1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CodeGenFunction.h"
15 #include "CGCXXABI.h"
16 #include "CGObjCRuntime.h"
17 #include "CGOpenCLRuntime.h"
18 #include "CodeGenModule.h"
19 #include "TargetInfo.h"
20 #include "clang/AST/ASTContext.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/Basic/TargetBuiltins.h"
23 #include "clang/Basic/TargetInfo.h"
24 #include "clang/CodeGen/CGFunctionInfo.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/IR/CallSite.h"
27 #include "llvm/IR/DataLayout.h"
28 #include "llvm/IR/InlineAsm.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/MDBuilder.h"
31 #include <sstream>
32 
33 using namespace clang;
34 using namespace CodeGen;
35 using namespace llvm;
36 
37 /// getBuiltinLibFunction - Given a builtin id for a function like
38 /// "__builtin_fabsf", return a Function* for "fabsf".
39 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
40                                                   unsigned BuiltinID) {
41   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
42 
43   // Get the name, skip over the __builtin_ prefix (if necessary).
44   StringRef Name;
45   GlobalDecl D(FD);
46 
47   // If the builtin has been declared explicitly with an assembler label,
48   // use the mangled name. This differs from the plain label on platforms
49   // that prefix labels.
50   if (FD->hasAttr<AsmLabelAttr>())
51     Name = getMangledName(D);
52   else
53     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
54 
55   llvm::FunctionType *Ty =
56     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
57 
58   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
59 }
60 
61 /// Emit the conversions required to turn the given value into an
62 /// integer of the given size.
63 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
64                         QualType T, llvm::IntegerType *IntType) {
65   V = CGF.EmitToMemory(V, T);
66 
67   if (V->getType()->isPointerTy())
68     return CGF.Builder.CreatePtrToInt(V, IntType);
69 
70   assert(V->getType() == IntType);
71   return V;
72 }
73 
74 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
75                           QualType T, llvm::Type *ResultType) {
76   V = CGF.EmitFromMemory(V, T);
77 
78   if (ResultType->isPointerTy())
79     return CGF.Builder.CreateIntToPtr(V, ResultType);
80 
81   assert(V->getType() == ResultType);
82   return V;
83 }
84 
85 /// Utility to insert an atomic instruction based on Instrinsic::ID
86 /// and the expression node.
87 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
88                                     llvm::AtomicRMWInst::BinOp Kind,
89                                     const CallExpr *E) {
90   QualType T = E->getType();
91   assert(E->getArg(0)->getType()->isPointerType());
92   assert(CGF.getContext().hasSameUnqualifiedType(T,
93                                   E->getArg(0)->getType()->getPointeeType()));
94   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
95 
96   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
97   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
98 
99   llvm::IntegerType *IntType =
100     llvm::IntegerType::get(CGF.getLLVMContext(),
101                            CGF.getContext().getTypeSize(T));
102   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
103 
104   llvm::Value *Args[2];
105   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
106   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
107   llvm::Type *ValueType = Args[1]->getType();
108   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
109 
110   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
111       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
112   return EmitFromInt(CGF, Result, T, ValueType);
113 }
114 
115 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
116   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
117   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
118 
119   // Convert the type of the pointer to a pointer to the stored type.
120   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
121   Value *BC = CGF.Builder.CreateBitCast(
122       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
123   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
124   LV.setNontemporal(true);
125   CGF.EmitStoreOfScalar(Val, LV, false);
126   return nullptr;
127 }
128 
129 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
130   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
131 
132   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
133   LV.setNontemporal(true);
134   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
135 }
136 
137 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
138                                llvm::AtomicRMWInst::BinOp Kind,
139                                const CallExpr *E) {
140   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
141 }
142 
143 /// Utility to insert an atomic instruction based Instrinsic::ID and
144 /// the expression node, where the return value is the result of the
145 /// operation.
146 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
147                                    llvm::AtomicRMWInst::BinOp Kind,
148                                    const CallExpr *E,
149                                    Instruction::BinaryOps Op,
150                                    bool Invert = false) {
151   QualType T = E->getType();
152   assert(E->getArg(0)->getType()->isPointerType());
153   assert(CGF.getContext().hasSameUnqualifiedType(T,
154                                   E->getArg(0)->getType()->getPointeeType()));
155   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
156 
157   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
158   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
159 
160   llvm::IntegerType *IntType =
161     llvm::IntegerType::get(CGF.getLLVMContext(),
162                            CGF.getContext().getTypeSize(T));
163   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
164 
165   llvm::Value *Args[2];
166   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
167   llvm::Type *ValueType = Args[1]->getType();
168   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
169   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
170 
171   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
172       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
173   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
174   if (Invert)
175     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
176                                      llvm::ConstantInt::get(IntType, -1));
177   Result = EmitFromInt(CGF, Result, T, ValueType);
178   return RValue::get(Result);
179 }
180 
181 /// @brief Utility to insert an atomic cmpxchg instruction.
182 ///
183 /// @param CGF The current codegen function.
184 /// @param E   Builtin call expression to convert to cmpxchg.
185 ///            arg0 - address to operate on
186 ///            arg1 - value to compare with
187 ///            arg2 - new value
188 /// @param ReturnBool Specifies whether to return success flag of
189 ///                   cmpxchg result or the old value.
190 ///
191 /// @returns result of cmpxchg, according to ReturnBool
192 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
193                                      bool ReturnBool) {
194   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
195   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
196   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
197 
198   llvm::IntegerType *IntType = llvm::IntegerType::get(
199       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
200   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
201 
202   Value *Args[3];
203   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
204   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
205   llvm::Type *ValueType = Args[1]->getType();
206   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
207   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
208 
209   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
210       Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
211       llvm::AtomicOrdering::SequentiallyConsistent);
212   if (ReturnBool)
213     // Extract boolean success flag and zext it to int.
214     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
215                                   CGF.ConvertType(E->getType()));
216   else
217     // Extract old value and emit it using the same type as compare value.
218     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
219                        ValueType);
220 }
221 
222 // Emit a simple mangled intrinsic that has 1 argument and a return type
223 // matching the argument type.
224 static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
225                                const CallExpr *E,
226                                unsigned IntrinsicID) {
227   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
228 
229   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
230   return CGF.Builder.CreateCall(F, Src0);
231 }
232 
233 // Emit an intrinsic that has 2 operands of the same type as its result.
234 static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
235                                 const CallExpr *E,
236                                 unsigned IntrinsicID) {
237   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
238   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
239 
240   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
241   return CGF.Builder.CreateCall(F, { Src0, Src1 });
242 }
243 
244 // Emit an intrinsic that has 3 operands of the same type as its result.
245 static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
246                                  const CallExpr *E,
247                                  unsigned IntrinsicID) {
248   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
249   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
250   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
251 
252   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
253   return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
254 }
255 
256 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
257 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
258                                const CallExpr *E,
259                                unsigned IntrinsicID) {
260   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
261   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
262 
263   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
264   return CGF.Builder.CreateCall(F, {Src0, Src1});
265 }
266 
267 /// EmitFAbs - Emit a call to @llvm.fabs().
268 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
269   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
270   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
271   Call->setDoesNotAccessMemory();
272   return Call;
273 }
274 
275 /// Emit the computation of the sign bit for a floating point value. Returns
276 /// the i1 sign bit value.
277 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
278   LLVMContext &C = CGF.CGM.getLLVMContext();
279 
280   llvm::Type *Ty = V->getType();
281   int Width = Ty->getPrimitiveSizeInBits();
282   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
283   V = CGF.Builder.CreateBitCast(V, IntTy);
284   if (Ty->isPPC_FP128Ty()) {
285     // We want the sign bit of the higher-order double. The bitcast we just
286     // did works as if the double-double was stored to memory and then
287     // read as an i128. The "store" will put the higher-order double in the
288     // lower address in both little- and big-Endian modes, but the "load"
289     // will treat those bits as a different part of the i128: the low bits in
290     // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
291     // we need to shift the high bits down to the low before truncating.
292     Width >>= 1;
293     if (CGF.getTarget().isBigEndian()) {
294       Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
295       V = CGF.Builder.CreateLShr(V, ShiftCst);
296     }
297     // We are truncating value in order to extract the higher-order
298     // double, which we will be using to extract the sign from.
299     IntTy = llvm::IntegerType::get(C, Width);
300     V = CGF.Builder.CreateTrunc(V, IntTy);
301   }
302   Value *Zero = llvm::Constant::getNullValue(IntTy);
303   return CGF.Builder.CreateICmpSLT(V, Zero);
304 }
305 
306 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
307                               const CallExpr *E, llvm::Value *calleeValue) {
308   return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E,
309                       ReturnValueSlot(), Fn);
310 }
311 
312 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
313 /// depending on IntrinsicID.
314 ///
315 /// \arg CGF The current codegen function.
316 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
317 /// \arg X The first argument to the llvm.*.with.overflow.*.
318 /// \arg Y The second argument to the llvm.*.with.overflow.*.
319 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
320 /// \returns The result (i.e. sum/product) returned by the intrinsic.
321 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
322                                           const llvm::Intrinsic::ID IntrinsicID,
323                                           llvm::Value *X, llvm::Value *Y,
324                                           llvm::Value *&Carry) {
325   // Make sure we have integers of the same width.
326   assert(X->getType() == Y->getType() &&
327          "Arguments must be the same type. (Did you forget to make sure both "
328          "arguments have the same integer width?)");
329 
330   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
331   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
332   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
333   return CGF.Builder.CreateExtractValue(Tmp, 0);
334 }
335 
336 static Value *emitRangedBuiltin(CodeGenFunction &CGF,
337                                 unsigned IntrinsicID,
338                                 int low, int high) {
339     llvm::MDBuilder MDHelper(CGF.getLLVMContext());
340     llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
341     Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
342     llvm::Instruction *Call = CGF.Builder.CreateCall(F);
343     Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
344     return Call;
345 }
346 
347 namespace {
348   struct WidthAndSignedness {
349     unsigned Width;
350     bool Signed;
351   };
352 }
353 
354 static WidthAndSignedness
355 getIntegerWidthAndSignedness(const clang::ASTContext &context,
356                              const clang::QualType Type) {
357   assert(Type->isIntegerType() && "Given type is not an integer.");
358   unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
359   bool Signed = Type->isSignedIntegerType();
360   return {Width, Signed};
361 }
362 
363 // Given one or more integer types, this function produces an integer type that
364 // encompasses them: any value in one of the given types could be expressed in
365 // the encompassing type.
366 static struct WidthAndSignedness
367 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
368   assert(Types.size() > 0 && "Empty list of types.");
369 
370   // If any of the given types is signed, we must return a signed type.
371   bool Signed = false;
372   for (const auto &Type : Types) {
373     Signed |= Type.Signed;
374   }
375 
376   // The encompassing type must have a width greater than or equal to the width
377   // of the specified types.  Aditionally, if the encompassing type is signed,
378   // its width must be strictly greater than the width of any unsigned types
379   // given.
380   unsigned Width = 0;
381   for (const auto &Type : Types) {
382     unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
383     if (Width < MinWidth) {
384       Width = MinWidth;
385     }
386   }
387 
388   return {Width, Signed};
389 }
390 
391 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
392   llvm::Type *DestType = Int8PtrTy;
393   if (ArgValue->getType() != DestType)
394     ArgValue =
395         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
396 
397   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
398   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
399 }
400 
401 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
402 /// __builtin_object_size(p, @p To) is correct
403 static bool areBOSTypesCompatible(int From, int To) {
404   // Note: Our __builtin_object_size implementation currently treats Type=0 and
405   // Type=2 identically. Encoding this implementation detail here may make
406   // improving __builtin_object_size difficult in the future, so it's omitted.
407   return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
408 }
409 
410 static llvm::Value *
411 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
412   return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
413 }
414 
415 llvm::Value *
416 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
417                                                  llvm::IntegerType *ResType) {
418   uint64_t ObjectSize;
419   if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
420     return emitBuiltinObjectSize(E, Type, ResType);
421   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
422 }
423 
424 /// Returns a Value corresponding to the size of the given expression.
425 /// This Value may be either of the following:
426 ///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
427 ///     it)
428 ///   - A call to the @llvm.objectsize intrinsic
429 llvm::Value *
430 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
431                                        llvm::IntegerType *ResType) {
432   // We need to reference an argument if the pointer is a parameter with the
433   // pass_object_size attribute.
434   if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
435     auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
436     auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
437     if (Param != nullptr && PS != nullptr &&
438         areBOSTypesCompatible(PS->getType(), Type)) {
439       auto Iter = SizeArguments.find(Param);
440       assert(Iter != SizeArguments.end());
441 
442       const ImplicitParamDecl *D = Iter->second;
443       auto DIter = LocalDeclMap.find(D);
444       assert(DIter != LocalDeclMap.end());
445 
446       return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
447                               getContext().getSizeType(), E->getLocStart());
448     }
449   }
450 
451   // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
452   // evaluate E for side-effects. In either case, we shouldn't lower to
453   // @llvm.objectsize.
454   if (Type == 3 || E->HasSideEffects(getContext()))
455     return getDefaultBuiltinObjectSizeResult(Type, ResType);
456 
457   // LLVM only supports 0 and 2, make sure that we pass along that
458   // as a boolean.
459   auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1);
460   // FIXME: Get right address space.
461   llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)};
462   Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
463   return Builder.CreateCall(F, {EmitScalarExpr(E), CI});
464 }
465 
466 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
467 // handle them here.
468 enum class CodeGenFunction::MSVCIntrin {
469   _BitScanForward,
470   _BitScanReverse,
471   _InterlockedAnd,
472   _InterlockedDecrement,
473   _InterlockedExchange,
474   _InterlockedExchangeAdd,
475   _InterlockedExchangeSub,
476   _InterlockedIncrement,
477   _InterlockedOr,
478   _InterlockedXor,
479 };
480 
481 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
482   const CallExpr *E) {
483   switch (BuiltinID) {
484   case MSVCIntrin::_BitScanForward:
485   case MSVCIntrin::_BitScanReverse: {
486     Value *ArgValue = EmitScalarExpr(E->getArg(1));
487 
488     llvm::Type *ArgType = ArgValue->getType();
489     llvm::Type *IndexType =
490       EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
491     llvm::Type *ResultType = ConvertType(E->getType());
492 
493     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
494     Value *ResZero = llvm::Constant::getNullValue(ResultType);
495     Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
496 
497     BasicBlock *Begin = Builder.GetInsertBlock();
498     BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
499     Builder.SetInsertPoint(End);
500     PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
501 
502     Builder.SetInsertPoint(Begin);
503     Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
504     BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
505     Builder.CreateCondBr(IsZero, End, NotZero);
506     Result->addIncoming(ResZero, Begin);
507 
508     Builder.SetInsertPoint(NotZero);
509     Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
510 
511     if (BuiltinID == MSVCIntrin::_BitScanForward) {
512       Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
513       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
514       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
515       Builder.CreateStore(ZeroCount, IndexAddress, false);
516     } else {
517       unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
518       Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
519 
520       Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
521       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
522       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
523       Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
524       Builder.CreateStore(Index, IndexAddress, false);
525     }
526     Builder.CreateBr(End);
527     Result->addIncoming(ResOne, NotZero);
528 
529     Builder.SetInsertPoint(End);
530     return Result;
531   }
532   case MSVCIntrin::_InterlockedAnd:
533     return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
534   case MSVCIntrin::_InterlockedExchange:
535     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
536   case MSVCIntrin::_InterlockedExchangeAdd:
537     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
538   case MSVCIntrin::_InterlockedExchangeSub:
539     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
540   case MSVCIntrin::_InterlockedOr:
541     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
542   case MSVCIntrin::_InterlockedXor:
543     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
544 
545   case MSVCIntrin::_InterlockedDecrement: {
546     llvm::Type *IntTy = ConvertType(E->getType());
547     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
548       AtomicRMWInst::Sub,
549       EmitScalarExpr(E->getArg(0)),
550       ConstantInt::get(IntTy, 1),
551       llvm::AtomicOrdering::SequentiallyConsistent);
552     return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
553   }
554   case MSVCIntrin::_InterlockedIncrement: {
555     llvm::Type *IntTy = ConvertType(E->getType());
556     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
557       AtomicRMWInst::Add,
558       EmitScalarExpr(E->getArg(0)),
559       ConstantInt::get(IntTy, 1),
560       llvm::AtomicOrdering::SequentiallyConsistent);
561     return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
562   }
563   }
564   llvm_unreachable("Incorrect MSVC intrinsic!");
565 }
566 
567 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
568                                         unsigned BuiltinID, const CallExpr *E,
569                                         ReturnValueSlot ReturnValue) {
570   // See if we can constant fold this builtin.  If so, don't emit it at all.
571   Expr::EvalResult Result;
572   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
573       !Result.hasSideEffects()) {
574     if (Result.Val.isInt())
575       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
576                                                 Result.Val.getInt()));
577     if (Result.Val.isFloat())
578       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
579                                                Result.Val.getFloat()));
580   }
581 
582   switch (BuiltinID) {
583   default: break;  // Handle intrinsics and libm functions below.
584   case Builtin::BI__builtin___CFStringMakeConstantString:
585   case Builtin::BI__builtin___NSStringMakeConstantString:
586     return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
587   case Builtin::BI__builtin_stdarg_start:
588   case Builtin::BI__builtin_va_start:
589   case Builtin::BI__va_start:
590   case Builtin::BI__builtin_va_end:
591     return RValue::get(
592         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
593                            ? EmitScalarExpr(E->getArg(0))
594                            : EmitVAListRef(E->getArg(0)).getPointer(),
595                        BuiltinID != Builtin::BI__builtin_va_end));
596   case Builtin::BI__builtin_va_copy: {
597     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
598     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
599 
600     llvm::Type *Type = Int8PtrTy;
601 
602     DstPtr = Builder.CreateBitCast(DstPtr, Type);
603     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
604     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
605                                           {DstPtr, SrcPtr}));
606   }
607   case Builtin::BI__builtin_abs:
608   case Builtin::BI__builtin_labs:
609   case Builtin::BI__builtin_llabs: {
610     Value *ArgValue = EmitScalarExpr(E->getArg(0));
611 
612     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
613     Value *CmpResult =
614     Builder.CreateICmpSGE(ArgValue,
615                           llvm::Constant::getNullValue(ArgValue->getType()),
616                                                             "abscond");
617     Value *Result =
618       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
619 
620     return RValue::get(Result);
621   }
622   case Builtin::BI__builtin_fabs:
623   case Builtin::BI__builtin_fabsf:
624   case Builtin::BI__builtin_fabsl: {
625     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
626   }
627   case Builtin::BI__builtin_fmod:
628   case Builtin::BI__builtin_fmodf:
629   case Builtin::BI__builtin_fmodl: {
630     Value *Arg1 = EmitScalarExpr(E->getArg(0));
631     Value *Arg2 = EmitScalarExpr(E->getArg(1));
632     Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
633     return RValue::get(Result);
634   }
635   case Builtin::BI__builtin_copysign:
636   case Builtin::BI__builtin_copysignf:
637   case Builtin::BI__builtin_copysignl: {
638     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
639   }
640   case Builtin::BI__builtin_ceil:
641   case Builtin::BI__builtin_ceilf:
642   case Builtin::BI__builtin_ceill: {
643     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
644   }
645   case Builtin::BI__builtin_floor:
646   case Builtin::BI__builtin_floorf:
647   case Builtin::BI__builtin_floorl: {
648     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
649   }
650   case Builtin::BI__builtin_trunc:
651   case Builtin::BI__builtin_truncf:
652   case Builtin::BI__builtin_truncl: {
653     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
654   }
655   case Builtin::BI__builtin_rint:
656   case Builtin::BI__builtin_rintf:
657   case Builtin::BI__builtin_rintl: {
658     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
659   }
660   case Builtin::BI__builtin_nearbyint:
661   case Builtin::BI__builtin_nearbyintf:
662   case Builtin::BI__builtin_nearbyintl: {
663     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
664   }
665   case Builtin::BI__builtin_round:
666   case Builtin::BI__builtin_roundf:
667   case Builtin::BI__builtin_roundl: {
668     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
669   }
670   case Builtin::BI__builtin_fmin:
671   case Builtin::BI__builtin_fminf:
672   case Builtin::BI__builtin_fminl: {
673     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
674   }
675   case Builtin::BI__builtin_fmax:
676   case Builtin::BI__builtin_fmaxf:
677   case Builtin::BI__builtin_fmaxl: {
678     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
679   }
680   case Builtin::BI__builtin_conj:
681   case Builtin::BI__builtin_conjf:
682   case Builtin::BI__builtin_conjl: {
683     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
684     Value *Real = ComplexVal.first;
685     Value *Imag = ComplexVal.second;
686     Value *Zero =
687       Imag->getType()->isFPOrFPVectorTy()
688         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
689         : llvm::Constant::getNullValue(Imag->getType());
690 
691     Imag = Builder.CreateFSub(Zero, Imag, "sub");
692     return RValue::getComplex(std::make_pair(Real, Imag));
693   }
694   case Builtin::BI__builtin_creal:
695   case Builtin::BI__builtin_crealf:
696   case Builtin::BI__builtin_creall:
697   case Builtin::BIcreal:
698   case Builtin::BIcrealf:
699   case Builtin::BIcreall: {
700     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
701     return RValue::get(ComplexVal.first);
702   }
703 
704   case Builtin::BI__builtin_cimag:
705   case Builtin::BI__builtin_cimagf:
706   case Builtin::BI__builtin_cimagl:
707   case Builtin::BIcimag:
708   case Builtin::BIcimagf:
709   case Builtin::BIcimagl: {
710     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
711     return RValue::get(ComplexVal.second);
712   }
713 
714   case Builtin::BI__builtin_ctzs:
715   case Builtin::BI__builtin_ctz:
716   case Builtin::BI__builtin_ctzl:
717   case Builtin::BI__builtin_ctzll: {
718     Value *ArgValue = EmitScalarExpr(E->getArg(0));
719 
720     llvm::Type *ArgType = ArgValue->getType();
721     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
722 
723     llvm::Type *ResultType = ConvertType(E->getType());
724     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
725     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
726     if (Result->getType() != ResultType)
727       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
728                                      "cast");
729     return RValue::get(Result);
730   }
731   case Builtin::BI__builtin_clzs:
732   case Builtin::BI__builtin_clz:
733   case Builtin::BI__builtin_clzl:
734   case Builtin::BI__builtin_clzll: {
735     Value *ArgValue = EmitScalarExpr(E->getArg(0));
736 
737     llvm::Type *ArgType = ArgValue->getType();
738     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
739 
740     llvm::Type *ResultType = ConvertType(E->getType());
741     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
742     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
743     if (Result->getType() != ResultType)
744       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
745                                      "cast");
746     return RValue::get(Result);
747   }
748   case Builtin::BI__builtin_ffs:
749   case Builtin::BI__builtin_ffsl:
750   case Builtin::BI__builtin_ffsll: {
751     // ffs(x) -> x ? cttz(x) + 1 : 0
752     Value *ArgValue = EmitScalarExpr(E->getArg(0));
753 
754     llvm::Type *ArgType = ArgValue->getType();
755     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
756 
757     llvm::Type *ResultType = ConvertType(E->getType());
758     Value *Tmp =
759         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
760                           llvm::ConstantInt::get(ArgType, 1));
761     Value *Zero = llvm::Constant::getNullValue(ArgType);
762     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
763     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
764     if (Result->getType() != ResultType)
765       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
766                                      "cast");
767     return RValue::get(Result);
768   }
769   case Builtin::BI__builtin_parity:
770   case Builtin::BI__builtin_parityl:
771   case Builtin::BI__builtin_parityll: {
772     // parity(x) -> ctpop(x) & 1
773     Value *ArgValue = EmitScalarExpr(E->getArg(0));
774 
775     llvm::Type *ArgType = ArgValue->getType();
776     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
777 
778     llvm::Type *ResultType = ConvertType(E->getType());
779     Value *Tmp = Builder.CreateCall(F, ArgValue);
780     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
781     if (Result->getType() != ResultType)
782       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
783                                      "cast");
784     return RValue::get(Result);
785   }
786   case Builtin::BI__popcnt16:
787   case Builtin::BI__popcnt:
788   case Builtin::BI__popcnt64:
789   case Builtin::BI__builtin_popcount:
790   case Builtin::BI__builtin_popcountl:
791   case Builtin::BI__builtin_popcountll: {
792     Value *ArgValue = EmitScalarExpr(E->getArg(0));
793 
794     llvm::Type *ArgType = ArgValue->getType();
795     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
796 
797     llvm::Type *ResultType = ConvertType(E->getType());
798     Value *Result = Builder.CreateCall(F, ArgValue);
799     if (Result->getType() != ResultType)
800       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
801                                      "cast");
802     return RValue::get(Result);
803   }
804   case Builtin::BI_rotr8:
805   case Builtin::BI_rotr16:
806   case Builtin::BI_rotr:
807   case Builtin::BI_lrotr:
808   case Builtin::BI_rotr64: {
809     Value *Val = EmitScalarExpr(E->getArg(0));
810     Value *Shift = EmitScalarExpr(E->getArg(1));
811 
812     llvm::Type *ArgType = Val->getType();
813     Shift = Builder.CreateIntCast(Shift, ArgType, false);
814     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
815     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
816     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
817 
818     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
819     Shift = Builder.CreateAnd(Shift, Mask);
820     Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
821 
822     Value *RightShifted = Builder.CreateLShr(Val, Shift);
823     Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
824     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
825 
826     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
827     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
828     return RValue::get(Result);
829   }
830   case Builtin::BI_rotl8:
831   case Builtin::BI_rotl16:
832   case Builtin::BI_rotl:
833   case Builtin::BI_lrotl:
834   case Builtin::BI_rotl64: {
835     Value *Val = EmitScalarExpr(E->getArg(0));
836     Value *Shift = EmitScalarExpr(E->getArg(1));
837 
838     llvm::Type *ArgType = Val->getType();
839     Shift = Builder.CreateIntCast(Shift, ArgType, false);
840     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
841     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
842     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
843 
844     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
845     Shift = Builder.CreateAnd(Shift, Mask);
846     Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
847 
848     Value *LeftShifted = Builder.CreateShl(Val, Shift);
849     Value *RightShifted = Builder.CreateLShr(Val, RightShift);
850     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
851 
852     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
853     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
854     return RValue::get(Result);
855   }
856   case Builtin::BI__builtin_unpredictable: {
857     // Always return the argument of __builtin_unpredictable. LLVM does not
858     // handle this builtin. Metadata for this builtin should be added directly
859     // to instructions such as branches or switches that use it.
860     return RValue::get(EmitScalarExpr(E->getArg(0)));
861   }
862   case Builtin::BI__builtin_expect: {
863     Value *ArgValue = EmitScalarExpr(E->getArg(0));
864     llvm::Type *ArgType = ArgValue->getType();
865 
866     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
867     // Don't generate llvm.expect on -O0 as the backend won't use it for
868     // anything.
869     // Note, we still IRGen ExpectedValue because it could have side-effects.
870     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
871       return RValue::get(ArgValue);
872 
873     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
874     Value *Result =
875         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
876     return RValue::get(Result);
877   }
878   case Builtin::BI__builtin_assume_aligned: {
879     Value *PtrValue = EmitScalarExpr(E->getArg(0));
880     Value *OffsetValue =
881       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
882 
883     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
884     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
885     unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
886 
887     EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
888     return RValue::get(PtrValue);
889   }
890   case Builtin::BI__assume:
891   case Builtin::BI__builtin_assume: {
892     if (E->getArg(0)->HasSideEffects(getContext()))
893       return RValue::get(nullptr);
894 
895     Value *ArgValue = EmitScalarExpr(E->getArg(0));
896     Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
897     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
898   }
899   case Builtin::BI__builtin_bswap16:
900   case Builtin::BI__builtin_bswap32:
901   case Builtin::BI__builtin_bswap64: {
902     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
903   }
904   case Builtin::BI__builtin_bitreverse8:
905   case Builtin::BI__builtin_bitreverse16:
906   case Builtin::BI__builtin_bitreverse32:
907   case Builtin::BI__builtin_bitreverse64: {
908     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
909   }
910   case Builtin::BI__builtin_object_size: {
911     unsigned Type =
912         E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
913     auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
914 
915     // We pass this builtin onto the optimizer so that it can figure out the
916     // object size in more complex cases.
917     return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType));
918   }
919   case Builtin::BI__builtin_prefetch: {
920     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
921     // FIXME: Technically these constants should of type 'int', yes?
922     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
923       llvm::ConstantInt::get(Int32Ty, 0);
924     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
925       llvm::ConstantInt::get(Int32Ty, 3);
926     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
927     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
928     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
929   }
930   case Builtin::BI__builtin_readcyclecounter: {
931     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
932     return RValue::get(Builder.CreateCall(F));
933   }
934   case Builtin::BI__builtin___clear_cache: {
935     Value *Begin = EmitScalarExpr(E->getArg(0));
936     Value *End = EmitScalarExpr(E->getArg(1));
937     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
938     return RValue::get(Builder.CreateCall(F, {Begin, End}));
939   }
940   case Builtin::BI__builtin_trap:
941     return RValue::get(EmitTrapCall(Intrinsic::trap));
942   case Builtin::BI__debugbreak:
943     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
944   case Builtin::BI__builtin_unreachable: {
945     if (SanOpts.has(SanitizerKind::Unreachable)) {
946       SanitizerScope SanScope(this);
947       EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
948                                SanitizerKind::Unreachable),
949                 "builtin_unreachable", EmitCheckSourceLocation(E->getExprLoc()),
950                 None);
951     } else
952       Builder.CreateUnreachable();
953 
954     // We do need to preserve an insertion point.
955     EmitBlock(createBasicBlock("unreachable.cont"));
956 
957     return RValue::get(nullptr);
958   }
959 
960   case Builtin::BI__builtin_powi:
961   case Builtin::BI__builtin_powif:
962   case Builtin::BI__builtin_powil: {
963     Value *Base = EmitScalarExpr(E->getArg(0));
964     Value *Exponent = EmitScalarExpr(E->getArg(1));
965     llvm::Type *ArgType = Base->getType();
966     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
967     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
968   }
969 
970   case Builtin::BI__builtin_isgreater:
971   case Builtin::BI__builtin_isgreaterequal:
972   case Builtin::BI__builtin_isless:
973   case Builtin::BI__builtin_islessequal:
974   case Builtin::BI__builtin_islessgreater:
975   case Builtin::BI__builtin_isunordered: {
976     // Ordered comparisons: we know the arguments to these are matching scalar
977     // floating point values.
978     Value *LHS = EmitScalarExpr(E->getArg(0));
979     Value *RHS = EmitScalarExpr(E->getArg(1));
980 
981     switch (BuiltinID) {
982     default: llvm_unreachable("Unknown ordered comparison");
983     case Builtin::BI__builtin_isgreater:
984       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
985       break;
986     case Builtin::BI__builtin_isgreaterequal:
987       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
988       break;
989     case Builtin::BI__builtin_isless:
990       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
991       break;
992     case Builtin::BI__builtin_islessequal:
993       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
994       break;
995     case Builtin::BI__builtin_islessgreater:
996       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
997       break;
998     case Builtin::BI__builtin_isunordered:
999       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
1000       break;
1001     }
1002     // ZExt bool to int type.
1003     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
1004   }
1005   case Builtin::BI__builtin_isnan: {
1006     Value *V = EmitScalarExpr(E->getArg(0));
1007     V = Builder.CreateFCmpUNO(V, V, "cmp");
1008     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1009   }
1010 
1011   case Builtin::BIfinite:
1012   case Builtin::BI__finite:
1013   case Builtin::BIfinitef:
1014   case Builtin::BI__finitef:
1015   case Builtin::BIfinitel:
1016   case Builtin::BI__finitel:
1017   case Builtin::BI__builtin_isinf:
1018   case Builtin::BI__builtin_isfinite: {
1019     // isinf(x)    --> fabs(x) == infinity
1020     // isfinite(x) --> fabs(x) != infinity
1021     // x != NaN via the ordered compare in either case.
1022     Value *V = EmitScalarExpr(E->getArg(0));
1023     Value *Fabs = EmitFAbs(*this, V);
1024     Constant *Infinity = ConstantFP::getInfinity(V->getType());
1025     CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
1026                                   ? CmpInst::FCMP_OEQ
1027                                   : CmpInst::FCMP_ONE;
1028     Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
1029     return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
1030   }
1031 
1032   case Builtin::BI__builtin_isinf_sign: {
1033     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
1034     Value *Arg = EmitScalarExpr(E->getArg(0));
1035     Value *AbsArg = EmitFAbs(*this, Arg);
1036     Value *IsInf = Builder.CreateFCmpOEQ(
1037         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
1038     Value *IsNeg = EmitSignBit(*this, Arg);
1039 
1040     llvm::Type *IntTy = ConvertType(E->getType());
1041     Value *Zero = Constant::getNullValue(IntTy);
1042     Value *One = ConstantInt::get(IntTy, 1);
1043     Value *NegativeOne = ConstantInt::get(IntTy, -1);
1044     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
1045     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
1046     return RValue::get(Result);
1047   }
1048 
1049   case Builtin::BI__builtin_isnormal: {
1050     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
1051     Value *V = EmitScalarExpr(E->getArg(0));
1052     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
1053 
1054     Value *Abs = EmitFAbs(*this, V);
1055     Value *IsLessThanInf =
1056       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
1057     APFloat Smallest = APFloat::getSmallestNormalized(
1058                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
1059     Value *IsNormal =
1060       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
1061                             "isnormal");
1062     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
1063     V = Builder.CreateAnd(V, IsNormal, "and");
1064     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1065   }
1066 
1067   case Builtin::BI__builtin_fpclassify: {
1068     Value *V = EmitScalarExpr(E->getArg(5));
1069     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
1070 
1071     // Create Result
1072     BasicBlock *Begin = Builder.GetInsertBlock();
1073     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
1074     Builder.SetInsertPoint(End);
1075     PHINode *Result =
1076       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
1077                         "fpclassify_result");
1078 
1079     // if (V==0) return FP_ZERO
1080     Builder.SetInsertPoint(Begin);
1081     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
1082                                           "iszero");
1083     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
1084     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
1085     Builder.CreateCondBr(IsZero, End, NotZero);
1086     Result->addIncoming(ZeroLiteral, Begin);
1087 
1088     // if (V != V) return FP_NAN
1089     Builder.SetInsertPoint(NotZero);
1090     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
1091     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
1092     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
1093     Builder.CreateCondBr(IsNan, End, NotNan);
1094     Result->addIncoming(NanLiteral, NotZero);
1095 
1096     // if (fabs(V) == infinity) return FP_INFINITY
1097     Builder.SetInsertPoint(NotNan);
1098     Value *VAbs = EmitFAbs(*this, V);
1099     Value *IsInf =
1100       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
1101                             "isinf");
1102     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
1103     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
1104     Builder.CreateCondBr(IsInf, End, NotInf);
1105     Result->addIncoming(InfLiteral, NotNan);
1106 
1107     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
1108     Builder.SetInsertPoint(NotInf);
1109     APFloat Smallest = APFloat::getSmallestNormalized(
1110         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
1111     Value *IsNormal =
1112       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
1113                             "isnormal");
1114     Value *NormalResult =
1115       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
1116                            EmitScalarExpr(E->getArg(3)));
1117     Builder.CreateBr(End);
1118     Result->addIncoming(NormalResult, NotInf);
1119 
1120     // return Result
1121     Builder.SetInsertPoint(End);
1122     return RValue::get(Result);
1123   }
1124 
1125   case Builtin::BIalloca:
1126   case Builtin::BI_alloca:
1127   case Builtin::BI__builtin_alloca: {
1128     Value *Size = EmitScalarExpr(E->getArg(0));
1129     return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
1130   }
1131   case Builtin::BIbzero:
1132   case Builtin::BI__builtin_bzero: {
1133     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1134     Value *SizeVal = EmitScalarExpr(E->getArg(1));
1135     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1136                         E->getArg(0)->getExprLoc(), FD, 0);
1137     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
1138     return RValue::get(Dest.getPointer());
1139   }
1140   case Builtin::BImemcpy:
1141   case Builtin::BI__builtin_memcpy: {
1142     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1143     Address Src = EmitPointerWithAlignment(E->getArg(1));
1144     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1145     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1146                         E->getArg(0)->getExprLoc(), FD, 0);
1147     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1148                         E->getArg(1)->getExprLoc(), FD, 1);
1149     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1150     return RValue::get(Dest.getPointer());
1151   }
1152 
1153   case Builtin::BI__builtin___memcpy_chk: {
1154     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
1155     llvm::APSInt Size, DstSize;
1156     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1157         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1158       break;
1159     if (Size.ugt(DstSize))
1160       break;
1161     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1162     Address Src = EmitPointerWithAlignment(E->getArg(1));
1163     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1164     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1165     return RValue::get(Dest.getPointer());
1166   }
1167 
1168   case Builtin::BI__builtin_objc_memmove_collectable: {
1169     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1170     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1171     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1172     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1173                                                   DestAddr, SrcAddr, SizeVal);
1174     return RValue::get(DestAddr.getPointer());
1175   }
1176 
1177   case Builtin::BI__builtin___memmove_chk: {
1178     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1179     llvm::APSInt Size, DstSize;
1180     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1181         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1182       break;
1183     if (Size.ugt(DstSize))
1184       break;
1185     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1186     Address Src = EmitPointerWithAlignment(E->getArg(1));
1187     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1188     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1189     return RValue::get(Dest.getPointer());
1190   }
1191 
1192   case Builtin::BImemmove:
1193   case Builtin::BI__builtin_memmove: {
1194     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1195     Address Src = EmitPointerWithAlignment(E->getArg(1));
1196     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1197     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1198                         E->getArg(0)->getExprLoc(), FD, 0);
1199     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1200                         E->getArg(1)->getExprLoc(), FD, 1);
1201     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1202     return RValue::get(Dest.getPointer());
1203   }
1204   case Builtin::BImemset:
1205   case Builtin::BI__builtin_memset: {
1206     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1207     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1208                                          Builder.getInt8Ty());
1209     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1210     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1211                         E->getArg(0)->getExprLoc(), FD, 0);
1212     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1213     return RValue::get(Dest.getPointer());
1214   }
1215   case Builtin::BI__builtin___memset_chk: {
1216     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1217     llvm::APSInt Size, DstSize;
1218     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1219         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1220       break;
1221     if (Size.ugt(DstSize))
1222       break;
1223     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1224     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1225                                          Builder.getInt8Ty());
1226     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1227     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1228     return RValue::get(Dest.getPointer());
1229   }
1230   case Builtin::BI__builtin_dwarf_cfa: {
1231     // The offset in bytes from the first argument to the CFA.
1232     //
1233     // Why on earth is this in the frontend?  Is there any reason at
1234     // all that the backend can't reasonably determine this while
1235     // lowering llvm.eh.dwarf.cfa()?
1236     //
1237     // TODO: If there's a satisfactory reason, add a target hook for
1238     // this instead of hard-coding 0, which is correct for most targets.
1239     int32_t Offset = 0;
1240 
1241     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1242     return RValue::get(Builder.CreateCall(F,
1243                                       llvm::ConstantInt::get(Int32Ty, Offset)));
1244   }
1245   case Builtin::BI__builtin_return_address: {
1246     Value *Depth =
1247         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1248     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1249     return RValue::get(Builder.CreateCall(F, Depth));
1250   }
1251   case Builtin::BI_ReturnAddress: {
1252     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1253     return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
1254   }
1255   case Builtin::BI__builtin_frame_address: {
1256     Value *Depth =
1257         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1258     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1259     return RValue::get(Builder.CreateCall(F, Depth));
1260   }
1261   case Builtin::BI__builtin_extract_return_addr: {
1262     Value *Address = EmitScalarExpr(E->getArg(0));
1263     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1264     return RValue::get(Result);
1265   }
1266   case Builtin::BI__builtin_frob_return_addr: {
1267     Value *Address = EmitScalarExpr(E->getArg(0));
1268     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1269     return RValue::get(Result);
1270   }
1271   case Builtin::BI__builtin_dwarf_sp_column: {
1272     llvm::IntegerType *Ty
1273       = cast<llvm::IntegerType>(ConvertType(E->getType()));
1274     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1275     if (Column == -1) {
1276       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1277       return RValue::get(llvm::UndefValue::get(Ty));
1278     }
1279     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1280   }
1281   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1282     Value *Address = EmitScalarExpr(E->getArg(0));
1283     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1284       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1285     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1286   }
1287   case Builtin::BI__builtin_eh_return: {
1288     Value *Int = EmitScalarExpr(E->getArg(0));
1289     Value *Ptr = EmitScalarExpr(E->getArg(1));
1290 
1291     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1292     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1293            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1294     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1295                                   ? Intrinsic::eh_return_i32
1296                                   : Intrinsic::eh_return_i64);
1297     Builder.CreateCall(F, {Int, Ptr});
1298     Builder.CreateUnreachable();
1299 
1300     // We do need to preserve an insertion point.
1301     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1302 
1303     return RValue::get(nullptr);
1304   }
1305   case Builtin::BI__builtin_unwind_init: {
1306     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1307     return RValue::get(Builder.CreateCall(F));
1308   }
1309   case Builtin::BI__builtin_extend_pointer: {
1310     // Extends a pointer to the size of an _Unwind_Word, which is
1311     // uint64_t on all platforms.  Generally this gets poked into a
1312     // register and eventually used as an address, so if the
1313     // addressing registers are wider than pointers and the platform
1314     // doesn't implicitly ignore high-order bits when doing
1315     // addressing, we need to make sure we zext / sext based on
1316     // the platform's expectations.
1317     //
1318     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1319 
1320     // Cast the pointer to intptr_t.
1321     Value *Ptr = EmitScalarExpr(E->getArg(0));
1322     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1323 
1324     // If that's 64 bits, we're done.
1325     if (IntPtrTy->getBitWidth() == 64)
1326       return RValue::get(Result);
1327 
1328     // Otherwise, ask the codegen data what to do.
1329     if (getTargetHooks().extendPointerWithSExt())
1330       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1331     else
1332       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1333   }
1334   case Builtin::BI__builtin_setjmp: {
1335     // Buffer is a void**.
1336     Address Buf = EmitPointerWithAlignment(E->getArg(0));
1337 
1338     // Store the frame pointer to the setjmp buffer.
1339     Value *FrameAddr =
1340       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1341                          ConstantInt::get(Int32Ty, 0));
1342     Builder.CreateStore(FrameAddr, Buf);
1343 
1344     // Store the stack pointer to the setjmp buffer.
1345     Value *StackAddr =
1346         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1347     Address StackSaveSlot =
1348       Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1349     Builder.CreateStore(StackAddr, StackSaveSlot);
1350 
1351     // Call LLVM's EH setjmp, which is lightweight.
1352     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1353     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1354     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1355   }
1356   case Builtin::BI__builtin_longjmp: {
1357     Value *Buf = EmitScalarExpr(E->getArg(0));
1358     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1359 
1360     // Call LLVM's EH longjmp, which is lightweight.
1361     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1362 
1363     // longjmp doesn't return; mark this as unreachable.
1364     Builder.CreateUnreachable();
1365 
1366     // We do need to preserve an insertion point.
1367     EmitBlock(createBasicBlock("longjmp.cont"));
1368 
1369     return RValue::get(nullptr);
1370   }
1371   case Builtin::BI__sync_fetch_and_add:
1372   case Builtin::BI__sync_fetch_and_sub:
1373   case Builtin::BI__sync_fetch_and_or:
1374   case Builtin::BI__sync_fetch_and_and:
1375   case Builtin::BI__sync_fetch_and_xor:
1376   case Builtin::BI__sync_fetch_and_nand:
1377   case Builtin::BI__sync_add_and_fetch:
1378   case Builtin::BI__sync_sub_and_fetch:
1379   case Builtin::BI__sync_and_and_fetch:
1380   case Builtin::BI__sync_or_and_fetch:
1381   case Builtin::BI__sync_xor_and_fetch:
1382   case Builtin::BI__sync_nand_and_fetch:
1383   case Builtin::BI__sync_val_compare_and_swap:
1384   case Builtin::BI__sync_bool_compare_and_swap:
1385   case Builtin::BI__sync_lock_test_and_set:
1386   case Builtin::BI__sync_lock_release:
1387   case Builtin::BI__sync_swap:
1388     llvm_unreachable("Shouldn't make it through sema");
1389   case Builtin::BI__sync_fetch_and_add_1:
1390   case Builtin::BI__sync_fetch_and_add_2:
1391   case Builtin::BI__sync_fetch_and_add_4:
1392   case Builtin::BI__sync_fetch_and_add_8:
1393   case Builtin::BI__sync_fetch_and_add_16:
1394     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1395   case Builtin::BI__sync_fetch_and_sub_1:
1396   case Builtin::BI__sync_fetch_and_sub_2:
1397   case Builtin::BI__sync_fetch_and_sub_4:
1398   case Builtin::BI__sync_fetch_and_sub_8:
1399   case Builtin::BI__sync_fetch_and_sub_16:
1400     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1401   case Builtin::BI__sync_fetch_and_or_1:
1402   case Builtin::BI__sync_fetch_and_or_2:
1403   case Builtin::BI__sync_fetch_and_or_4:
1404   case Builtin::BI__sync_fetch_and_or_8:
1405   case Builtin::BI__sync_fetch_and_or_16:
1406     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1407   case Builtin::BI__sync_fetch_and_and_1:
1408   case Builtin::BI__sync_fetch_and_and_2:
1409   case Builtin::BI__sync_fetch_and_and_4:
1410   case Builtin::BI__sync_fetch_and_and_8:
1411   case Builtin::BI__sync_fetch_and_and_16:
1412     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1413   case Builtin::BI__sync_fetch_and_xor_1:
1414   case Builtin::BI__sync_fetch_and_xor_2:
1415   case Builtin::BI__sync_fetch_and_xor_4:
1416   case Builtin::BI__sync_fetch_and_xor_8:
1417   case Builtin::BI__sync_fetch_and_xor_16:
1418     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1419   case Builtin::BI__sync_fetch_and_nand_1:
1420   case Builtin::BI__sync_fetch_and_nand_2:
1421   case Builtin::BI__sync_fetch_and_nand_4:
1422   case Builtin::BI__sync_fetch_and_nand_8:
1423   case Builtin::BI__sync_fetch_and_nand_16:
1424     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1425 
1426   // Clang extensions: not overloaded yet.
1427   case Builtin::BI__sync_fetch_and_min:
1428     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1429   case Builtin::BI__sync_fetch_and_max:
1430     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1431   case Builtin::BI__sync_fetch_and_umin:
1432     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1433   case Builtin::BI__sync_fetch_and_umax:
1434     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1435 
1436   case Builtin::BI__sync_add_and_fetch_1:
1437   case Builtin::BI__sync_add_and_fetch_2:
1438   case Builtin::BI__sync_add_and_fetch_4:
1439   case Builtin::BI__sync_add_and_fetch_8:
1440   case Builtin::BI__sync_add_and_fetch_16:
1441     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1442                                 llvm::Instruction::Add);
1443   case Builtin::BI__sync_sub_and_fetch_1:
1444   case Builtin::BI__sync_sub_and_fetch_2:
1445   case Builtin::BI__sync_sub_and_fetch_4:
1446   case Builtin::BI__sync_sub_and_fetch_8:
1447   case Builtin::BI__sync_sub_and_fetch_16:
1448     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1449                                 llvm::Instruction::Sub);
1450   case Builtin::BI__sync_and_and_fetch_1:
1451   case Builtin::BI__sync_and_and_fetch_2:
1452   case Builtin::BI__sync_and_and_fetch_4:
1453   case Builtin::BI__sync_and_and_fetch_8:
1454   case Builtin::BI__sync_and_and_fetch_16:
1455     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1456                                 llvm::Instruction::And);
1457   case Builtin::BI__sync_or_and_fetch_1:
1458   case Builtin::BI__sync_or_and_fetch_2:
1459   case Builtin::BI__sync_or_and_fetch_4:
1460   case Builtin::BI__sync_or_and_fetch_8:
1461   case Builtin::BI__sync_or_and_fetch_16:
1462     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1463                                 llvm::Instruction::Or);
1464   case Builtin::BI__sync_xor_and_fetch_1:
1465   case Builtin::BI__sync_xor_and_fetch_2:
1466   case Builtin::BI__sync_xor_and_fetch_4:
1467   case Builtin::BI__sync_xor_and_fetch_8:
1468   case Builtin::BI__sync_xor_and_fetch_16:
1469     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1470                                 llvm::Instruction::Xor);
1471   case Builtin::BI__sync_nand_and_fetch_1:
1472   case Builtin::BI__sync_nand_and_fetch_2:
1473   case Builtin::BI__sync_nand_and_fetch_4:
1474   case Builtin::BI__sync_nand_and_fetch_8:
1475   case Builtin::BI__sync_nand_and_fetch_16:
1476     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1477                                 llvm::Instruction::And, true);
1478 
1479   case Builtin::BI__sync_val_compare_and_swap_1:
1480   case Builtin::BI__sync_val_compare_and_swap_2:
1481   case Builtin::BI__sync_val_compare_and_swap_4:
1482   case Builtin::BI__sync_val_compare_and_swap_8:
1483   case Builtin::BI__sync_val_compare_and_swap_16:
1484     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1485 
1486   case Builtin::BI__sync_bool_compare_and_swap_1:
1487   case Builtin::BI__sync_bool_compare_and_swap_2:
1488   case Builtin::BI__sync_bool_compare_and_swap_4:
1489   case Builtin::BI__sync_bool_compare_and_swap_8:
1490   case Builtin::BI__sync_bool_compare_and_swap_16:
1491     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1492 
1493   case Builtin::BI__sync_swap_1:
1494   case Builtin::BI__sync_swap_2:
1495   case Builtin::BI__sync_swap_4:
1496   case Builtin::BI__sync_swap_8:
1497   case Builtin::BI__sync_swap_16:
1498     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1499 
1500   case Builtin::BI__sync_lock_test_and_set_1:
1501   case Builtin::BI__sync_lock_test_and_set_2:
1502   case Builtin::BI__sync_lock_test_and_set_4:
1503   case Builtin::BI__sync_lock_test_and_set_8:
1504   case Builtin::BI__sync_lock_test_and_set_16:
1505     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1506 
1507   case Builtin::BI__sync_lock_release_1:
1508   case Builtin::BI__sync_lock_release_2:
1509   case Builtin::BI__sync_lock_release_4:
1510   case Builtin::BI__sync_lock_release_8:
1511   case Builtin::BI__sync_lock_release_16: {
1512     Value *Ptr = EmitScalarExpr(E->getArg(0));
1513     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1514     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1515     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1516                                              StoreSize.getQuantity() * 8);
1517     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1518     llvm::StoreInst *Store =
1519       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1520                                  StoreSize);
1521     Store->setAtomic(llvm::AtomicOrdering::Release);
1522     return RValue::get(nullptr);
1523   }
1524 
1525   case Builtin::BI__sync_synchronize: {
1526     // We assume this is supposed to correspond to a C++0x-style
1527     // sequentially-consistent fence (i.e. this is only usable for
1528     // synchonization, not device I/O or anything like that). This intrinsic
1529     // is really badly designed in the sense that in theory, there isn't
1530     // any way to safely use it... but in practice, it mostly works
1531     // to use it with non-atomic loads and stores to get acquire/release
1532     // semantics.
1533     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1534     return RValue::get(nullptr);
1535   }
1536 
1537   case Builtin::BI__builtin_nontemporal_load:
1538     return RValue::get(EmitNontemporalLoad(*this, E));
1539   case Builtin::BI__builtin_nontemporal_store:
1540     return RValue::get(EmitNontemporalStore(*this, E));
1541   case Builtin::BI__c11_atomic_is_lock_free:
1542   case Builtin::BI__atomic_is_lock_free: {
1543     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1544     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1545     // _Atomic(T) is always properly-aligned.
1546     const char *LibCallName = "__atomic_is_lock_free";
1547     CallArgList Args;
1548     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1549              getContext().getSizeType());
1550     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1551       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1552                getContext().VoidPtrTy);
1553     else
1554       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1555                getContext().VoidPtrTy);
1556     const CGFunctionInfo &FuncInfo =
1557         CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1558     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1559     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1560     return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
1561   }
1562 
1563   case Builtin::BI__atomic_test_and_set: {
1564     // Look at the argument type to determine whether this is a volatile
1565     // operation. The parameter type is always volatile.
1566     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1567     bool Volatile =
1568         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1569 
1570     Value *Ptr = EmitScalarExpr(E->getArg(0));
1571     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1572     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1573     Value *NewVal = Builder.getInt8(1);
1574     Value *Order = EmitScalarExpr(E->getArg(1));
1575     if (isa<llvm::ConstantInt>(Order)) {
1576       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1577       AtomicRMWInst *Result = nullptr;
1578       switch (ord) {
1579       case 0:  // memory_order_relaxed
1580       default: // invalid order
1581         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1582                                          llvm::AtomicOrdering::Monotonic);
1583         break;
1584       case 1: // memory_order_consume
1585       case 2: // memory_order_acquire
1586         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1587                                          llvm::AtomicOrdering::Acquire);
1588         break;
1589       case 3: // memory_order_release
1590         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1591                                          llvm::AtomicOrdering::Release);
1592         break;
1593       case 4: // memory_order_acq_rel
1594 
1595         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1596                                          llvm::AtomicOrdering::AcquireRelease);
1597         break;
1598       case 5: // memory_order_seq_cst
1599         Result = Builder.CreateAtomicRMW(
1600             llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1601             llvm::AtomicOrdering::SequentiallyConsistent);
1602         break;
1603       }
1604       Result->setVolatile(Volatile);
1605       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1606     }
1607 
1608     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1609 
1610     llvm::BasicBlock *BBs[5] = {
1611       createBasicBlock("monotonic", CurFn),
1612       createBasicBlock("acquire", CurFn),
1613       createBasicBlock("release", CurFn),
1614       createBasicBlock("acqrel", CurFn),
1615       createBasicBlock("seqcst", CurFn)
1616     };
1617     llvm::AtomicOrdering Orders[5] = {
1618         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1619         llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1620         llvm::AtomicOrdering::SequentiallyConsistent};
1621 
1622     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1623     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1624 
1625     Builder.SetInsertPoint(ContBB);
1626     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1627 
1628     for (unsigned i = 0; i < 5; ++i) {
1629       Builder.SetInsertPoint(BBs[i]);
1630       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1631                                                    Ptr, NewVal, Orders[i]);
1632       RMW->setVolatile(Volatile);
1633       Result->addIncoming(RMW, BBs[i]);
1634       Builder.CreateBr(ContBB);
1635     }
1636 
1637     SI->addCase(Builder.getInt32(0), BBs[0]);
1638     SI->addCase(Builder.getInt32(1), BBs[1]);
1639     SI->addCase(Builder.getInt32(2), BBs[1]);
1640     SI->addCase(Builder.getInt32(3), BBs[2]);
1641     SI->addCase(Builder.getInt32(4), BBs[3]);
1642     SI->addCase(Builder.getInt32(5), BBs[4]);
1643 
1644     Builder.SetInsertPoint(ContBB);
1645     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1646   }
1647 
1648   case Builtin::BI__atomic_clear: {
1649     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1650     bool Volatile =
1651         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1652 
1653     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1654     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1655     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1656     Value *NewVal = Builder.getInt8(0);
1657     Value *Order = EmitScalarExpr(E->getArg(1));
1658     if (isa<llvm::ConstantInt>(Order)) {
1659       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1660       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1661       switch (ord) {
1662       case 0:  // memory_order_relaxed
1663       default: // invalid order
1664         Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1665         break;
1666       case 3:  // memory_order_release
1667         Store->setOrdering(llvm::AtomicOrdering::Release);
1668         break;
1669       case 5:  // memory_order_seq_cst
1670         Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1671         break;
1672       }
1673       return RValue::get(nullptr);
1674     }
1675 
1676     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1677 
1678     llvm::BasicBlock *BBs[3] = {
1679       createBasicBlock("monotonic", CurFn),
1680       createBasicBlock("release", CurFn),
1681       createBasicBlock("seqcst", CurFn)
1682     };
1683     llvm::AtomicOrdering Orders[3] = {
1684         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1685         llvm::AtomicOrdering::SequentiallyConsistent};
1686 
1687     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1688     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1689 
1690     for (unsigned i = 0; i < 3; ++i) {
1691       Builder.SetInsertPoint(BBs[i]);
1692       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1693       Store->setOrdering(Orders[i]);
1694       Builder.CreateBr(ContBB);
1695     }
1696 
1697     SI->addCase(Builder.getInt32(0), BBs[0]);
1698     SI->addCase(Builder.getInt32(3), BBs[1]);
1699     SI->addCase(Builder.getInt32(5), BBs[2]);
1700 
1701     Builder.SetInsertPoint(ContBB);
1702     return RValue::get(nullptr);
1703   }
1704 
1705   case Builtin::BI__atomic_thread_fence:
1706   case Builtin::BI__atomic_signal_fence:
1707   case Builtin::BI__c11_atomic_thread_fence:
1708   case Builtin::BI__c11_atomic_signal_fence: {
1709     llvm::SynchronizationScope Scope;
1710     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1711         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1712       Scope = llvm::SingleThread;
1713     else
1714       Scope = llvm::CrossThread;
1715     Value *Order = EmitScalarExpr(E->getArg(0));
1716     if (isa<llvm::ConstantInt>(Order)) {
1717       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1718       switch (ord) {
1719       case 0:  // memory_order_relaxed
1720       default: // invalid order
1721         break;
1722       case 1:  // memory_order_consume
1723       case 2:  // memory_order_acquire
1724         Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1725         break;
1726       case 3:  // memory_order_release
1727         Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1728         break;
1729       case 4:  // memory_order_acq_rel
1730         Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1731         break;
1732       case 5:  // memory_order_seq_cst
1733         Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
1734                             Scope);
1735         break;
1736       }
1737       return RValue::get(nullptr);
1738     }
1739 
1740     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1741     AcquireBB = createBasicBlock("acquire", CurFn);
1742     ReleaseBB = createBasicBlock("release", CurFn);
1743     AcqRelBB = createBasicBlock("acqrel", CurFn);
1744     SeqCstBB = createBasicBlock("seqcst", CurFn);
1745     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1746 
1747     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1748     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1749 
1750     Builder.SetInsertPoint(AcquireBB);
1751     Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1752     Builder.CreateBr(ContBB);
1753     SI->addCase(Builder.getInt32(1), AcquireBB);
1754     SI->addCase(Builder.getInt32(2), AcquireBB);
1755 
1756     Builder.SetInsertPoint(ReleaseBB);
1757     Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1758     Builder.CreateBr(ContBB);
1759     SI->addCase(Builder.getInt32(3), ReleaseBB);
1760 
1761     Builder.SetInsertPoint(AcqRelBB);
1762     Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1763     Builder.CreateBr(ContBB);
1764     SI->addCase(Builder.getInt32(4), AcqRelBB);
1765 
1766     Builder.SetInsertPoint(SeqCstBB);
1767     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope);
1768     Builder.CreateBr(ContBB);
1769     SI->addCase(Builder.getInt32(5), SeqCstBB);
1770 
1771     Builder.SetInsertPoint(ContBB);
1772     return RValue::get(nullptr);
1773   }
1774 
1775     // Library functions with special handling.
1776   case Builtin::BIsqrt:
1777   case Builtin::BIsqrtf:
1778   case Builtin::BIsqrtl: {
1779     // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1780     // in finite- or unsafe-math mode (the intrinsic has different semantics
1781     // for handling negative numbers compared to the library function, so
1782     // -fmath-errno=0 is not enough).
1783     if (!FD->hasAttr<ConstAttr>())
1784       break;
1785     if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1786           CGM.getCodeGenOpts().NoNaNsFPMath))
1787       break;
1788     Value *Arg0 = EmitScalarExpr(E->getArg(0));
1789     llvm::Type *ArgType = Arg0->getType();
1790     Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1791     return RValue::get(Builder.CreateCall(F, Arg0));
1792   }
1793 
1794   case Builtin::BI__builtin_pow:
1795   case Builtin::BI__builtin_powf:
1796   case Builtin::BI__builtin_powl:
1797   case Builtin::BIpow:
1798   case Builtin::BIpowf:
1799   case Builtin::BIpowl: {
1800     // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1801     if (!FD->hasAttr<ConstAttr>())
1802       break;
1803     Value *Base = EmitScalarExpr(E->getArg(0));
1804     Value *Exponent = EmitScalarExpr(E->getArg(1));
1805     llvm::Type *ArgType = Base->getType();
1806     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1807     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1808   }
1809 
1810   case Builtin::BIfma:
1811   case Builtin::BIfmaf:
1812   case Builtin::BIfmal:
1813   case Builtin::BI__builtin_fma:
1814   case Builtin::BI__builtin_fmaf:
1815   case Builtin::BI__builtin_fmal: {
1816     // Rewrite fma to intrinsic.
1817     Value *FirstArg = EmitScalarExpr(E->getArg(0));
1818     llvm::Type *ArgType = FirstArg->getType();
1819     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1820     return RValue::get(
1821         Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1822                                EmitScalarExpr(E->getArg(2))}));
1823   }
1824 
1825   case Builtin::BI__builtin_signbit:
1826   case Builtin::BI__builtin_signbitf:
1827   case Builtin::BI__builtin_signbitl: {
1828     return RValue::get(
1829         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1830                            ConvertType(E->getType())));
1831   }
1832   case Builtin::BI__builtin_annotation: {
1833     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1834     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1835                                       AnnVal->getType());
1836 
1837     // Get the annotation string, go through casts. Sema requires this to be a
1838     // non-wide string literal, potentially casted, so the cast<> is safe.
1839     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1840     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1841     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1842   }
1843   case Builtin::BI__builtin_addcb:
1844   case Builtin::BI__builtin_addcs:
1845   case Builtin::BI__builtin_addc:
1846   case Builtin::BI__builtin_addcl:
1847   case Builtin::BI__builtin_addcll:
1848   case Builtin::BI__builtin_subcb:
1849   case Builtin::BI__builtin_subcs:
1850   case Builtin::BI__builtin_subc:
1851   case Builtin::BI__builtin_subcl:
1852   case Builtin::BI__builtin_subcll: {
1853 
1854     // We translate all of these builtins from expressions of the form:
1855     //   int x = ..., y = ..., carryin = ..., carryout, result;
1856     //   result = __builtin_addc(x, y, carryin, &carryout);
1857     //
1858     // to LLVM IR of the form:
1859     //
1860     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1861     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1862     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
1863     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1864     //                                                       i32 %carryin)
1865     //   %result = extractvalue {i32, i1} %tmp2, 0
1866     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
1867     //   %tmp3 = or i1 %carry1, %carry2
1868     //   %tmp4 = zext i1 %tmp3 to i32
1869     //   store i32 %tmp4, i32* %carryout
1870 
1871     // Scalarize our inputs.
1872     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1873     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1874     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1875     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
1876 
1877     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1878     llvm::Intrinsic::ID IntrinsicId;
1879     switch (BuiltinID) {
1880     default: llvm_unreachable("Unknown multiprecision builtin id.");
1881     case Builtin::BI__builtin_addcb:
1882     case Builtin::BI__builtin_addcs:
1883     case Builtin::BI__builtin_addc:
1884     case Builtin::BI__builtin_addcl:
1885     case Builtin::BI__builtin_addcll:
1886       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1887       break;
1888     case Builtin::BI__builtin_subcb:
1889     case Builtin::BI__builtin_subcs:
1890     case Builtin::BI__builtin_subc:
1891     case Builtin::BI__builtin_subcl:
1892     case Builtin::BI__builtin_subcll:
1893       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1894       break;
1895     }
1896 
1897     // Construct our resulting LLVM IR expression.
1898     llvm::Value *Carry1;
1899     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
1900                                               X, Y, Carry1);
1901     llvm::Value *Carry2;
1902     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
1903                                               Sum1, Carryin, Carry2);
1904     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
1905                                                X->getType());
1906     Builder.CreateStore(CarryOut, CarryOutPtr);
1907     return RValue::get(Sum2);
1908   }
1909 
1910   case Builtin::BI__builtin_add_overflow:
1911   case Builtin::BI__builtin_sub_overflow:
1912   case Builtin::BI__builtin_mul_overflow: {
1913     const clang::Expr *LeftArg = E->getArg(0);
1914     const clang::Expr *RightArg = E->getArg(1);
1915     const clang::Expr *ResultArg = E->getArg(2);
1916 
1917     clang::QualType ResultQTy =
1918         ResultArg->getType()->castAs<PointerType>()->getPointeeType();
1919 
1920     WidthAndSignedness LeftInfo =
1921         getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
1922     WidthAndSignedness RightInfo =
1923         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
1924     WidthAndSignedness ResultInfo =
1925         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
1926     WidthAndSignedness EncompassingInfo =
1927         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
1928 
1929     llvm::Type *EncompassingLLVMTy =
1930         llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
1931 
1932     llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
1933 
1934     llvm::Intrinsic::ID IntrinsicId;
1935     switch (BuiltinID) {
1936     default:
1937       llvm_unreachable("Unknown overflow builtin id.");
1938     case Builtin::BI__builtin_add_overflow:
1939       IntrinsicId = EncompassingInfo.Signed
1940                         ? llvm::Intrinsic::sadd_with_overflow
1941                         : llvm::Intrinsic::uadd_with_overflow;
1942       break;
1943     case Builtin::BI__builtin_sub_overflow:
1944       IntrinsicId = EncompassingInfo.Signed
1945                         ? llvm::Intrinsic::ssub_with_overflow
1946                         : llvm::Intrinsic::usub_with_overflow;
1947       break;
1948     case Builtin::BI__builtin_mul_overflow:
1949       IntrinsicId = EncompassingInfo.Signed
1950                         ? llvm::Intrinsic::smul_with_overflow
1951                         : llvm::Intrinsic::umul_with_overflow;
1952       break;
1953     }
1954 
1955     llvm::Value *Left = EmitScalarExpr(LeftArg);
1956     llvm::Value *Right = EmitScalarExpr(RightArg);
1957     Address ResultPtr = EmitPointerWithAlignment(ResultArg);
1958 
1959     // Extend each operand to the encompassing type.
1960     Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
1961     Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
1962 
1963     // Perform the operation on the extended values.
1964     llvm::Value *Overflow, *Result;
1965     Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
1966 
1967     if (EncompassingInfo.Width > ResultInfo.Width) {
1968       // The encompassing type is wider than the result type, so we need to
1969       // truncate it.
1970       llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
1971 
1972       // To see if the truncation caused an overflow, we will extend
1973       // the result and then compare it to the original result.
1974       llvm::Value *ResultTruncExt = Builder.CreateIntCast(
1975           ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
1976       llvm::Value *TruncationOverflow =
1977           Builder.CreateICmpNE(Result, ResultTruncExt);
1978 
1979       Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
1980       Result = ResultTrunc;
1981     }
1982 
1983     // Finally, store the result using the pointer.
1984     bool isVolatile =
1985       ResultArg->getType()->getPointeeType().isVolatileQualified();
1986     Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
1987 
1988     return RValue::get(Overflow);
1989   }
1990 
1991   case Builtin::BI__builtin_uadd_overflow:
1992   case Builtin::BI__builtin_uaddl_overflow:
1993   case Builtin::BI__builtin_uaddll_overflow:
1994   case Builtin::BI__builtin_usub_overflow:
1995   case Builtin::BI__builtin_usubl_overflow:
1996   case Builtin::BI__builtin_usubll_overflow:
1997   case Builtin::BI__builtin_umul_overflow:
1998   case Builtin::BI__builtin_umull_overflow:
1999   case Builtin::BI__builtin_umulll_overflow:
2000   case Builtin::BI__builtin_sadd_overflow:
2001   case Builtin::BI__builtin_saddl_overflow:
2002   case Builtin::BI__builtin_saddll_overflow:
2003   case Builtin::BI__builtin_ssub_overflow:
2004   case Builtin::BI__builtin_ssubl_overflow:
2005   case Builtin::BI__builtin_ssubll_overflow:
2006   case Builtin::BI__builtin_smul_overflow:
2007   case Builtin::BI__builtin_smull_overflow:
2008   case Builtin::BI__builtin_smulll_overflow: {
2009 
2010     // We translate all of these builtins directly to the relevant llvm IR node.
2011 
2012     // Scalarize our inputs.
2013     llvm::Value *X = EmitScalarExpr(E->getArg(0));
2014     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2015     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
2016 
2017     // Decide which of the overflow intrinsics we are lowering to:
2018     llvm::Intrinsic::ID IntrinsicId;
2019     switch (BuiltinID) {
2020     default: llvm_unreachable("Unknown overflow builtin id.");
2021     case Builtin::BI__builtin_uadd_overflow:
2022     case Builtin::BI__builtin_uaddl_overflow:
2023     case Builtin::BI__builtin_uaddll_overflow:
2024       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2025       break;
2026     case Builtin::BI__builtin_usub_overflow:
2027     case Builtin::BI__builtin_usubl_overflow:
2028     case Builtin::BI__builtin_usubll_overflow:
2029       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2030       break;
2031     case Builtin::BI__builtin_umul_overflow:
2032     case Builtin::BI__builtin_umull_overflow:
2033     case Builtin::BI__builtin_umulll_overflow:
2034       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
2035       break;
2036     case Builtin::BI__builtin_sadd_overflow:
2037     case Builtin::BI__builtin_saddl_overflow:
2038     case Builtin::BI__builtin_saddll_overflow:
2039       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
2040       break;
2041     case Builtin::BI__builtin_ssub_overflow:
2042     case Builtin::BI__builtin_ssubl_overflow:
2043     case Builtin::BI__builtin_ssubll_overflow:
2044       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
2045       break;
2046     case Builtin::BI__builtin_smul_overflow:
2047     case Builtin::BI__builtin_smull_overflow:
2048     case Builtin::BI__builtin_smulll_overflow:
2049       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
2050       break;
2051     }
2052 
2053 
2054     llvm::Value *Carry;
2055     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
2056     Builder.CreateStore(Sum, SumOutPtr);
2057 
2058     return RValue::get(Carry);
2059   }
2060   case Builtin::BI__builtin_addressof:
2061     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
2062   case Builtin::BI__builtin_operator_new:
2063     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2064                                     E->getArg(0), false);
2065   case Builtin::BI__builtin_operator_delete:
2066     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2067                                     E->getArg(0), true);
2068   case Builtin::BI__noop:
2069     // __noop always evaluates to an integer literal zero.
2070     return RValue::get(ConstantInt::get(IntTy, 0));
2071   case Builtin::BI__builtin_call_with_static_chain: {
2072     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
2073     const Expr *Chain = E->getArg(1);
2074     return EmitCall(Call->getCallee()->getType(),
2075                     EmitScalarExpr(Call->getCallee()), Call, ReturnValue,
2076                     Call->getCalleeDecl(), EmitScalarExpr(Chain));
2077   }
2078   case Builtin::BI_InterlockedExchange8:
2079   case Builtin::BI_InterlockedExchange16:
2080   case Builtin::BI_InterlockedExchange:
2081   case Builtin::BI_InterlockedExchangePointer:
2082     return RValue::get(
2083         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
2084   case Builtin::BI_InterlockedCompareExchangePointer: {
2085     llvm::Type *RTy;
2086     llvm::IntegerType *IntType =
2087       IntegerType::get(getLLVMContext(),
2088                        getContext().getTypeSize(E->getType()));
2089     llvm::Type *IntPtrType = IntType->getPointerTo();
2090 
2091     llvm::Value *Destination =
2092       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
2093 
2094     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
2095     RTy = Exchange->getType();
2096     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
2097 
2098     llvm::Value *Comparand =
2099       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
2100 
2101     auto Result =
2102         Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
2103                                     AtomicOrdering::SequentiallyConsistent,
2104                                     AtomicOrdering::SequentiallyConsistent);
2105     Result->setVolatile(true);
2106 
2107     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
2108                                                                          0),
2109                                               RTy));
2110   }
2111   case Builtin::BI_InterlockedCompareExchange8:
2112   case Builtin::BI_InterlockedCompareExchange16:
2113   case Builtin::BI_InterlockedCompareExchange:
2114   case Builtin::BI_InterlockedCompareExchange64: {
2115     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
2116         EmitScalarExpr(E->getArg(0)),
2117         EmitScalarExpr(E->getArg(2)),
2118         EmitScalarExpr(E->getArg(1)),
2119         AtomicOrdering::SequentiallyConsistent,
2120         AtomicOrdering::SequentiallyConsistent);
2121       CXI->setVolatile(true);
2122       return RValue::get(Builder.CreateExtractValue(CXI, 0));
2123   }
2124   case Builtin::BI_InterlockedIncrement16:
2125   case Builtin::BI_InterlockedIncrement:
2126     return RValue::get(
2127         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
2128   case Builtin::BI_InterlockedDecrement16:
2129   case Builtin::BI_InterlockedDecrement:
2130     return RValue::get(
2131         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
2132   case Builtin::BI_InterlockedAnd8:
2133   case Builtin::BI_InterlockedAnd16:
2134   case Builtin::BI_InterlockedAnd:
2135     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
2136   case Builtin::BI_InterlockedExchangeAdd8:
2137   case Builtin::BI_InterlockedExchangeAdd16:
2138   case Builtin::BI_InterlockedExchangeAdd:
2139     return RValue::get(
2140         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
2141   case Builtin::BI_InterlockedExchangeSub8:
2142   case Builtin::BI_InterlockedExchangeSub16:
2143   case Builtin::BI_InterlockedExchangeSub:
2144     return RValue::get(
2145         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
2146   case Builtin::BI_InterlockedOr8:
2147   case Builtin::BI_InterlockedOr16:
2148   case Builtin::BI_InterlockedOr:
2149     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
2150   case Builtin::BI_InterlockedXor8:
2151   case Builtin::BI_InterlockedXor16:
2152   case Builtin::BI_InterlockedXor:
2153     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
2154   case Builtin::BI__readfsdword: {
2155     llvm::Type *IntTy = ConvertType(E->getType());
2156     Value *IntToPtr =
2157       Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
2158                              llvm::PointerType::get(IntTy, 257));
2159     LoadInst *Load =
2160         Builder.CreateDefaultAlignedLoad(IntToPtr, /*isVolatile=*/true);
2161     return RValue::get(Load);
2162   }
2163 
2164   case Builtin::BI__exception_code:
2165   case Builtin::BI_exception_code:
2166     return RValue::get(EmitSEHExceptionCode());
2167   case Builtin::BI__exception_info:
2168   case Builtin::BI_exception_info:
2169     return RValue::get(EmitSEHExceptionInfo());
2170   case Builtin::BI__abnormal_termination:
2171   case Builtin::BI_abnormal_termination:
2172     return RValue::get(EmitSEHAbnormalTermination());
2173   case Builtin::BI_setjmpex: {
2174     if (getTarget().getTriple().isOSMSVCRT()) {
2175       llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2176       llvm::AttributeSet ReturnsTwiceAttr =
2177           AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
2178                             llvm::Attribute::ReturnsTwice);
2179       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2180           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2181           "_setjmpex", ReturnsTwiceAttr);
2182       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2183           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2184       llvm::Value *FrameAddr =
2185           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2186                              ConstantInt::get(Int32Ty, 0));
2187       llvm::Value *Args[] = {Buf, FrameAddr};
2188       llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2189       CS.setAttributes(ReturnsTwiceAttr);
2190       return RValue::get(CS.getInstruction());
2191     }
2192     break;
2193   }
2194   case Builtin::BI_setjmp: {
2195     if (getTarget().getTriple().isOSMSVCRT()) {
2196       llvm::AttributeSet ReturnsTwiceAttr =
2197           AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
2198                             llvm::Attribute::ReturnsTwice);
2199       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2200           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2201       llvm::CallSite CS;
2202       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2203         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2204         llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2205             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2206             "_setjmp3", ReturnsTwiceAttr);
2207         llvm::Value *Count = ConstantInt::get(IntTy, 0);
2208         llvm::Value *Args[] = {Buf, Count};
2209         CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2210       } else {
2211         llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2212         llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2213             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2214             "_setjmp", ReturnsTwiceAttr);
2215         llvm::Value *FrameAddr =
2216             Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2217                                ConstantInt::get(Int32Ty, 0));
2218         llvm::Value *Args[] = {Buf, FrameAddr};
2219         CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2220       }
2221       CS.setAttributes(ReturnsTwiceAttr);
2222       return RValue::get(CS.getInstruction());
2223     }
2224     break;
2225   }
2226 
2227   case Builtin::BI__GetExceptionInfo: {
2228     if (llvm::GlobalVariable *GV =
2229             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2230       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2231     break;
2232   }
2233 
2234   case Builtin::BI__builtin_coro_size: {
2235     auto & Context = getContext();
2236     auto SizeTy = Context.getSizeType();
2237     auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
2238     Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
2239     return RValue::get(Builder.CreateCall(F));
2240   }
2241 
2242   case Builtin::BI__builtin_coro_id:
2243     return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
2244   case Builtin::BI__builtin_coro_promise:
2245     return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
2246   case Builtin::BI__builtin_coro_resume:
2247     return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
2248   case Builtin::BI__builtin_coro_frame:
2249     return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
2250   case Builtin::BI__builtin_coro_free:
2251     return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
2252   case Builtin::BI__builtin_coro_destroy:
2253     return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
2254   case Builtin::BI__builtin_coro_done:
2255     return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
2256   case Builtin::BI__builtin_coro_alloc:
2257     return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
2258   case Builtin::BI__builtin_coro_begin:
2259     return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
2260   case Builtin::BI__builtin_coro_end:
2261     return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
2262   case Builtin::BI__builtin_coro_suspend:
2263     return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
2264   case Builtin::BI__builtin_coro_param:
2265     return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
2266 
2267   // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2268   case Builtin::BIread_pipe:
2269   case Builtin::BIwrite_pipe: {
2270     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2271           *Arg1 = EmitScalarExpr(E->getArg(1));
2272     CGOpenCLRuntime OpenCLRT(CGM);
2273     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2274     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2275 
2276     // Type of the generic packet parameter.
2277     unsigned GenericAS =
2278         getContext().getTargetAddressSpace(LangAS::opencl_generic);
2279     llvm::Type *I8PTy = llvm::PointerType::get(
2280         llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2281 
2282     // Testing which overloaded version we should generate the call for.
2283     if (2U == E->getNumArgs()) {
2284       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2285                                                              : "__write_pipe_2";
2286       // Creating a generic function type to be able to call with any builtin or
2287       // user defined type.
2288       llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
2289       llvm::FunctionType *FTy = llvm::FunctionType::get(
2290           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2291       Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2292       return RValue::get(
2293           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2294                              {Arg0, BCast, PacketSize, PacketAlign}));
2295     } else {
2296       assert(4 == E->getNumArgs() &&
2297              "Illegal number of parameters to pipe function");
2298       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2299                                                              : "__write_pipe_4";
2300 
2301       llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
2302                               Int32Ty, Int32Ty};
2303       Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2304             *Arg3 = EmitScalarExpr(E->getArg(3));
2305       llvm::FunctionType *FTy = llvm::FunctionType::get(
2306           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2307       Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2308       // We know the third argument is an integer type, but we may need to cast
2309       // it to i32.
2310       if (Arg2->getType() != Int32Ty)
2311         Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2312       return RValue::get(Builder.CreateCall(
2313           CGM.CreateRuntimeFunction(FTy, Name),
2314           {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
2315     }
2316   }
2317   // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2318   // functions
2319   case Builtin::BIreserve_read_pipe:
2320   case Builtin::BIreserve_write_pipe:
2321   case Builtin::BIwork_group_reserve_read_pipe:
2322   case Builtin::BIwork_group_reserve_write_pipe:
2323   case Builtin::BIsub_group_reserve_read_pipe:
2324   case Builtin::BIsub_group_reserve_write_pipe: {
2325     // Composing the mangled name for the function.
2326     const char *Name;
2327     if (BuiltinID == Builtin::BIreserve_read_pipe)
2328       Name = "__reserve_read_pipe";
2329     else if (BuiltinID == Builtin::BIreserve_write_pipe)
2330       Name = "__reserve_write_pipe";
2331     else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2332       Name = "__work_group_reserve_read_pipe";
2333     else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2334       Name = "__work_group_reserve_write_pipe";
2335     else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2336       Name = "__sub_group_reserve_read_pipe";
2337     else
2338       Name = "__sub_group_reserve_write_pipe";
2339 
2340     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2341           *Arg1 = EmitScalarExpr(E->getArg(1));
2342     llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2343     CGOpenCLRuntime OpenCLRT(CGM);
2344     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2345     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2346 
2347     // Building the generic function prototype.
2348     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
2349     llvm::FunctionType *FTy = llvm::FunctionType::get(
2350         ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2351     // We know the second argument is an integer type, but we may need to cast
2352     // it to i32.
2353     if (Arg1->getType() != Int32Ty)
2354       Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2355     return RValue::get(
2356         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2357                            {Arg0, Arg1, PacketSize, PacketAlign}));
2358   }
2359   // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2360   // functions
2361   case Builtin::BIcommit_read_pipe:
2362   case Builtin::BIcommit_write_pipe:
2363   case Builtin::BIwork_group_commit_read_pipe:
2364   case Builtin::BIwork_group_commit_write_pipe:
2365   case Builtin::BIsub_group_commit_read_pipe:
2366   case Builtin::BIsub_group_commit_write_pipe: {
2367     const char *Name;
2368     if (BuiltinID == Builtin::BIcommit_read_pipe)
2369       Name = "__commit_read_pipe";
2370     else if (BuiltinID == Builtin::BIcommit_write_pipe)
2371       Name = "__commit_write_pipe";
2372     else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2373       Name = "__work_group_commit_read_pipe";
2374     else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2375       Name = "__work_group_commit_write_pipe";
2376     else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2377       Name = "__sub_group_commit_read_pipe";
2378     else
2379       Name = "__sub_group_commit_write_pipe";
2380 
2381     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2382           *Arg1 = EmitScalarExpr(E->getArg(1));
2383     CGOpenCLRuntime OpenCLRT(CGM);
2384     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2385     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2386 
2387     // Building the generic function prototype.
2388     llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
2389     llvm::FunctionType *FTy =
2390         llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2391                                 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2392 
2393     return RValue::get(
2394         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2395                            {Arg0, Arg1, PacketSize, PacketAlign}));
2396   }
2397   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2398   case Builtin::BIget_pipe_num_packets:
2399   case Builtin::BIget_pipe_max_packets: {
2400     const char *Name;
2401     if (BuiltinID == Builtin::BIget_pipe_num_packets)
2402       Name = "__get_pipe_num_packets";
2403     else
2404       Name = "__get_pipe_max_packets";
2405 
2406     // Building the generic function prototype.
2407     Value *Arg0 = EmitScalarExpr(E->getArg(0));
2408     CGOpenCLRuntime OpenCLRT(CGM);
2409     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2410     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2411     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
2412     llvm::FunctionType *FTy = llvm::FunctionType::get(
2413         Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2414 
2415     return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2416                                           {Arg0, PacketSize, PacketAlign}));
2417   }
2418 
2419   // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2420   case Builtin::BIto_global:
2421   case Builtin::BIto_local:
2422   case Builtin::BIto_private: {
2423     auto Arg0 = EmitScalarExpr(E->getArg(0));
2424     auto NewArgT = llvm::PointerType::get(Int8Ty,
2425       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2426     auto NewRetT = llvm::PointerType::get(Int8Ty,
2427       CGM.getContext().getTargetAddressSpace(
2428         E->getType()->getPointeeType().getAddressSpace()));
2429     auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2430     llvm::Value *NewArg;
2431     if (Arg0->getType()->getPointerAddressSpace() !=
2432         NewArgT->getPointerAddressSpace())
2433       NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2434     else
2435       NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2436     auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2437     auto NewCall =
2438         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2439     return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2440       ConvertType(E->getType())));
2441   }
2442 
2443   // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2444   // It contains four different overload formats specified in Table 6.13.17.1.
2445   case Builtin::BIenqueue_kernel: {
2446     StringRef Name; // Generated function call name
2447     unsigned NumArgs = E->getNumArgs();
2448 
2449     llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2450     llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy);
2451 
2452     llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2453     llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2454     llvm::Value *Range = EmitScalarExpr(E->getArg(2));
2455 
2456     if (NumArgs == 4) {
2457       // The most basic form of the call with parameters:
2458       // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2459       Name = "__enqueue_kernel_basic";
2460       llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy};
2461       llvm::FunctionType *FTy = llvm::FunctionType::get(
2462           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
2463 
2464       llvm::Value *Block =
2465           Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
2466 
2467       return RValue::get(Builder.CreateCall(
2468           CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block}));
2469     }
2470     assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2471 
2472     // Could have events and/or vaargs.
2473     if (E->getArg(3)->getType()->isBlockPointerType()) {
2474       // No events passed, but has variadic arguments.
2475       Name = "__enqueue_kernel_vaargs";
2476       llvm::Value *Block =
2477           Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
2478       // Create a vector of the arguments, as well as a constant value to
2479       // express to the runtime the number of variadic arguments.
2480       std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
2481                                          ConstantInt::get(IntTy, NumArgs - 4)};
2482       std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy,
2483                                           IntTy};
2484 
2485       // Add the variadics.
2486       for (unsigned I = 4; I < NumArgs; ++I) {
2487         llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I));
2488         unsigned TypeSizeInBytes =
2489             getContext()
2490                 .getTypeSizeInChars(E->getArg(I)->getType())
2491                 .getQuantity();
2492         Args.push_back(TypeSizeInBytes < 4
2493                            ? Builder.CreateZExt(ArgSize, Int32Ty)
2494                            : ArgSize);
2495       }
2496 
2497       llvm::FunctionType *FTy = llvm::FunctionType::get(
2498           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2499       return RValue::get(
2500           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2501                              llvm::ArrayRef<llvm::Value *>(Args)));
2502     }
2503     // Any calls now have event arguments passed.
2504     if (NumArgs >= 7) {
2505       llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2506       unsigned AS4 =
2507           E->getArg(4)->getType()->isArrayType()
2508               ? E->getArg(4)->getType().getAddressSpace()
2509               : E->getArg(4)->getType()->getPointeeType().getAddressSpace();
2510       llvm::Type *EventPtrAS4Ty =
2511           EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS4));
2512       unsigned AS5 =
2513           E->getArg(5)->getType()->getPointeeType().getAddressSpace();
2514       llvm::Type *EventPtrAS5Ty =
2515           EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS5));
2516 
2517       llvm::Value *NumEvents = EmitScalarExpr(E->getArg(3));
2518       llvm::Value *EventList =
2519           E->getArg(4)->getType()->isArrayType()
2520               ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2521               : EmitScalarExpr(E->getArg(4));
2522       llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2523       llvm::Value *Block =
2524           Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy);
2525 
2526       std::vector<llvm::Type *> ArgTys = {
2527           QueueTy,       Int32Ty,       RangeTy,  Int32Ty,
2528           EventPtrAS4Ty, EventPtrAS5Ty, Int8PtrTy};
2529       std::vector<llvm::Value *> Args = {Queue,     Flags,    Range, NumEvents,
2530                                          EventList, ClkEvent, Block};
2531 
2532       if (NumArgs == 7) {
2533         // Has events but no variadics.
2534         Name = "__enqueue_kernel_basic_events";
2535         llvm::FunctionType *FTy = llvm::FunctionType::get(
2536             Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2537         return RValue::get(
2538             Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2539                                llvm::ArrayRef<llvm::Value *>(Args)));
2540       }
2541       // Has event info and variadics
2542       // Pass the number of variadics to the runtime function too.
2543       Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2544       ArgTys.push_back(Int32Ty);
2545       Name = "__enqueue_kernel_events_vaargs";
2546 
2547       // Add the variadics.
2548       for (unsigned I = 7; I < NumArgs; ++I) {
2549         llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I));
2550         unsigned TypeSizeInBytes =
2551             getContext()
2552                 .getTypeSizeInChars(E->getArg(I)->getType())
2553                 .getQuantity();
2554         Args.push_back(TypeSizeInBytes < 4
2555                            ? Builder.CreateZExt(ArgSize, Int32Ty)
2556                            : ArgSize);
2557       }
2558       llvm::FunctionType *FTy = llvm::FunctionType::get(
2559           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2560       return RValue::get(
2561           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2562                              llvm::ArrayRef<llvm::Value *>(Args)));
2563     }
2564   }
2565   // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2566   // parameter.
2567   case Builtin::BIget_kernel_work_group_size: {
2568     Value *Arg = EmitScalarExpr(E->getArg(0));
2569     Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
2570     return RValue::get(
2571         Builder.CreateCall(CGM.CreateRuntimeFunction(
2572                                llvm::FunctionType::get(IntTy, Int8PtrTy, false),
2573                                "__get_kernel_work_group_size_impl"),
2574                            Arg));
2575   }
2576   case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
2577     Value *Arg = EmitScalarExpr(E->getArg(0));
2578     Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
2579     return RValue::get(Builder.CreateCall(
2580         CGM.CreateRuntimeFunction(
2581             llvm::FunctionType::get(IntTy, Int8PtrTy, false),
2582             "__get_kernel_preferred_work_group_multiple_impl"),
2583         Arg));
2584   }
2585   case Builtin::BIprintf:
2586     if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
2587       return EmitCUDADevicePrintfCallExpr(E, ReturnValue);
2588     break;
2589   case Builtin::BI__builtin_canonicalize:
2590   case Builtin::BI__builtin_canonicalizef:
2591   case Builtin::BI__builtin_canonicalizel:
2592     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2593 
2594   case Builtin::BI__builtin_thread_pointer: {
2595     if (!getContext().getTargetInfo().isTLSSupported())
2596       CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
2597     // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
2598     break;
2599   }
2600   }
2601 
2602   // If this is an alias for a lib function (e.g. __builtin_sin), emit
2603   // the call using the normal call path, but using the unmangled
2604   // version of the function name.
2605   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
2606     return emitLibraryCall(*this, FD, E,
2607                            CGM.getBuiltinLibFunction(FD, BuiltinID));
2608 
2609   // If this is a predefined lib function (e.g. malloc), emit the call
2610   // using exactly the normal call path.
2611   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
2612     return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
2613 
2614   // Check that a call to a target specific builtin has the correct target
2615   // features.
2616   // This is down here to avoid non-target specific builtins, however, if
2617   // generic builtins start to require generic target features then we
2618   // can move this up to the beginning of the function.
2619   checkTargetFeatures(E, FD);
2620 
2621   // See if we have a target specific intrinsic.
2622   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
2623   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
2624   StringRef Prefix =
2625       llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
2626   if (!Prefix.empty()) {
2627     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
2628     // NOTE we dont need to perform a compatibility flag check here since the
2629     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
2630     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
2631     if (IntrinsicID == Intrinsic::not_intrinsic)
2632       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
2633   }
2634 
2635   if (IntrinsicID != Intrinsic::not_intrinsic) {
2636     SmallVector<Value*, 16> Args;
2637 
2638     // Find out if any arguments are required to be integer constant
2639     // expressions.
2640     unsigned ICEArguments = 0;
2641     ASTContext::GetBuiltinTypeError Error;
2642     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2643     assert(Error == ASTContext::GE_None && "Should not codegen an error");
2644 
2645     Function *F = CGM.getIntrinsic(IntrinsicID);
2646     llvm::FunctionType *FTy = F->getFunctionType();
2647 
2648     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
2649       Value *ArgValue;
2650       // If this is a normal argument, just emit it as a scalar.
2651       if ((ICEArguments & (1 << i)) == 0) {
2652         ArgValue = EmitScalarExpr(E->getArg(i));
2653       } else {
2654         // If this is required to be a constant, constant fold it so that we
2655         // know that the generated intrinsic gets a ConstantInt.
2656         llvm::APSInt Result;
2657         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2658         assert(IsConst && "Constant arg isn't actually constant?");
2659         (void)IsConst;
2660         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2661       }
2662 
2663       // If the intrinsic arg type is different from the builtin arg type
2664       // we need to do a bit cast.
2665       llvm::Type *PTy = FTy->getParamType(i);
2666       if (PTy != ArgValue->getType()) {
2667         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2668                "Must be able to losslessly bit cast to param");
2669         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2670       }
2671 
2672       Args.push_back(ArgValue);
2673     }
2674 
2675     Value *V = Builder.CreateCall(F, Args);
2676     QualType BuiltinRetType = E->getType();
2677 
2678     llvm::Type *RetTy = VoidTy;
2679     if (!BuiltinRetType->isVoidType())
2680       RetTy = ConvertType(BuiltinRetType);
2681 
2682     if (RetTy != V->getType()) {
2683       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
2684              "Must be able to losslessly bit cast result type");
2685       V = Builder.CreateBitCast(V, RetTy);
2686     }
2687 
2688     return RValue::get(V);
2689   }
2690 
2691   // See if we have a target specific builtin that needs to be lowered.
2692   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
2693     return RValue::get(V);
2694 
2695   ErrorUnsupported(E, "builtin function");
2696 
2697   // Unknown builtin, for now just dump it out and return undef.
2698   return GetUndefRValue(E->getType());
2699 }
2700 
2701 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
2702                                         unsigned BuiltinID, const CallExpr *E,
2703                                         llvm::Triple::ArchType Arch) {
2704   switch (Arch) {
2705   case llvm::Triple::arm:
2706   case llvm::Triple::armeb:
2707   case llvm::Triple::thumb:
2708   case llvm::Triple::thumbeb:
2709     return CGF->EmitARMBuiltinExpr(BuiltinID, E);
2710   case llvm::Triple::aarch64:
2711   case llvm::Triple::aarch64_be:
2712     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
2713   case llvm::Triple::x86:
2714   case llvm::Triple::x86_64:
2715     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
2716   case llvm::Triple::ppc:
2717   case llvm::Triple::ppc64:
2718   case llvm::Triple::ppc64le:
2719     return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
2720   case llvm::Triple::r600:
2721   case llvm::Triple::amdgcn:
2722     return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
2723   case llvm::Triple::systemz:
2724     return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
2725   case llvm::Triple::nvptx:
2726   case llvm::Triple::nvptx64:
2727     return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
2728   case llvm::Triple::wasm32:
2729   case llvm::Triple::wasm64:
2730     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
2731   default:
2732     return nullptr;
2733   }
2734 }
2735 
2736 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
2737                                               const CallExpr *E) {
2738   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
2739     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
2740     return EmitTargetArchBuiltinExpr(
2741         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
2742         getContext().getAuxTargetInfo()->getTriple().getArch());
2743   }
2744 
2745   return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
2746                                    getTarget().getTriple().getArch());
2747 }
2748 
2749 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
2750                                      NeonTypeFlags TypeFlags,
2751                                      bool V1Ty=false) {
2752   int IsQuad = TypeFlags.isQuad();
2753   switch (TypeFlags.getEltType()) {
2754   case NeonTypeFlags::Int8:
2755   case NeonTypeFlags::Poly8:
2756     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
2757   case NeonTypeFlags::Int16:
2758   case NeonTypeFlags::Poly16:
2759   case NeonTypeFlags::Float16:
2760     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
2761   case NeonTypeFlags::Int32:
2762     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
2763   case NeonTypeFlags::Int64:
2764   case NeonTypeFlags::Poly64:
2765     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
2766   case NeonTypeFlags::Poly128:
2767     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
2768     // There is a lot of i128 and f128 API missing.
2769     // so we use v16i8 to represent poly128 and get pattern matched.
2770     return llvm::VectorType::get(CGF->Int8Ty, 16);
2771   case NeonTypeFlags::Float32:
2772     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
2773   case NeonTypeFlags::Float64:
2774     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
2775   }
2776   llvm_unreachable("Unknown vector element type!");
2777 }
2778 
2779 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
2780                                           NeonTypeFlags IntTypeFlags) {
2781   int IsQuad = IntTypeFlags.isQuad();
2782   switch (IntTypeFlags.getEltType()) {
2783   case NeonTypeFlags::Int32:
2784     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
2785   case NeonTypeFlags::Int64:
2786     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
2787   default:
2788     llvm_unreachable("Type can't be converted to floating-point!");
2789   }
2790 }
2791 
2792 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
2793   unsigned nElts = V->getType()->getVectorNumElements();
2794   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
2795   return Builder.CreateShuffleVector(V, V, SV, "lane");
2796 }
2797 
2798 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
2799                                      const char *name,
2800                                      unsigned shift, bool rightshift) {
2801   unsigned j = 0;
2802   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
2803        ai != ae; ++ai, ++j)
2804     if (shift > 0 && shift == j)
2805       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
2806     else
2807       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
2808 
2809   return Builder.CreateCall(F, Ops, name);
2810 }
2811 
2812 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
2813                                             bool neg) {
2814   int SV = cast<ConstantInt>(V)->getSExtValue();
2815   return ConstantInt::get(Ty, neg ? -SV : SV);
2816 }
2817 
2818 // \brief Right-shift a vector by a constant.
2819 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
2820                                           llvm::Type *Ty, bool usgn,
2821                                           const char *name) {
2822   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
2823 
2824   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
2825   int EltSize = VTy->getScalarSizeInBits();
2826 
2827   Vec = Builder.CreateBitCast(Vec, Ty);
2828 
2829   // lshr/ashr are undefined when the shift amount is equal to the vector
2830   // element size.
2831   if (ShiftAmt == EltSize) {
2832     if (usgn) {
2833       // Right-shifting an unsigned value by its size yields 0.
2834       return llvm::ConstantAggregateZero::get(VTy);
2835     } else {
2836       // Right-shifting a signed value by its size is equivalent
2837       // to a shift of size-1.
2838       --ShiftAmt;
2839       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
2840     }
2841   }
2842 
2843   Shift = EmitNeonShiftVector(Shift, Ty, false);
2844   if (usgn)
2845     return Builder.CreateLShr(Vec, Shift, name);
2846   else
2847     return Builder.CreateAShr(Vec, Shift, name);
2848 }
2849 
2850 enum {
2851   AddRetType = (1 << 0),
2852   Add1ArgType = (1 << 1),
2853   Add2ArgTypes = (1 << 2),
2854 
2855   VectorizeRetType = (1 << 3),
2856   VectorizeArgTypes = (1 << 4),
2857 
2858   InventFloatType = (1 << 5),
2859   UnsignedAlts = (1 << 6),
2860 
2861   Use64BitVectors = (1 << 7),
2862   Use128BitVectors = (1 << 8),
2863 
2864   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
2865   VectorRet = AddRetType | VectorizeRetType,
2866   VectorRetGetArgs01 =
2867       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
2868   FpCmpzModifiers =
2869       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
2870 };
2871 
2872 namespace {
2873 struct NeonIntrinsicInfo {
2874   const char *NameHint;
2875   unsigned BuiltinID;
2876   unsigned LLVMIntrinsic;
2877   unsigned AltLLVMIntrinsic;
2878   unsigned TypeModifier;
2879 
2880   bool operator<(unsigned RHSBuiltinID) const {
2881     return BuiltinID < RHSBuiltinID;
2882   }
2883   bool operator<(const NeonIntrinsicInfo &TE) const {
2884     return BuiltinID < TE.BuiltinID;
2885   }
2886 };
2887 } // end anonymous namespace
2888 
2889 #define NEONMAP0(NameBase) \
2890   { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
2891 
2892 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
2893   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
2894       Intrinsic::LLVMIntrinsic, 0, TypeModifier }
2895 
2896 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
2897   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
2898       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
2899       TypeModifier }
2900 
2901 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
2902   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2903   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2904   NEONMAP1(vabs_v, arm_neon_vabs, 0),
2905   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
2906   NEONMAP0(vaddhn_v),
2907   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
2908   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
2909   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
2910   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
2911   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
2912   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
2913   NEONMAP1(vcage_v, arm_neon_vacge, 0),
2914   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
2915   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
2916   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
2917   NEONMAP1(vcale_v, arm_neon_vacge, 0),
2918   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
2919   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
2920   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
2921   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
2922   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
2923   NEONMAP1(vclz_v, ctlz, Add1ArgType),
2924   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2925   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2926   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2927   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
2928   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
2929   NEONMAP0(vcvt_f32_v),
2930   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2931   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2932   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2933   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2934   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2935   NEONMAP0(vcvt_s32_v),
2936   NEONMAP0(vcvt_s64_v),
2937   NEONMAP0(vcvt_u32_v),
2938   NEONMAP0(vcvt_u64_v),
2939   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
2940   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
2941   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
2942   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
2943   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
2944   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
2945   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
2946   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
2947   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
2948   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
2949   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
2950   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
2951   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
2952   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
2953   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
2954   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
2955   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
2956   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
2957   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
2958   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
2959   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
2960   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
2961   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
2962   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
2963   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
2964   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
2965   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
2966   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
2967   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
2968   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
2969   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
2970   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
2971   NEONMAP0(vcvtq_f32_v),
2972   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2973   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2974   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2975   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2976   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2977   NEONMAP0(vcvtq_s32_v),
2978   NEONMAP0(vcvtq_s64_v),
2979   NEONMAP0(vcvtq_u32_v),
2980   NEONMAP0(vcvtq_u64_v),
2981   NEONMAP0(vext_v),
2982   NEONMAP0(vextq_v),
2983   NEONMAP0(vfma_v),
2984   NEONMAP0(vfmaq_v),
2985   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2986   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2987   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2988   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2989   NEONMAP0(vld1_dup_v),
2990   NEONMAP1(vld1_v, arm_neon_vld1, 0),
2991   NEONMAP0(vld1q_dup_v),
2992   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
2993   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
2994   NEONMAP1(vld2_v, arm_neon_vld2, 0),
2995   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
2996   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
2997   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
2998   NEONMAP1(vld3_v, arm_neon_vld3, 0),
2999   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
3000   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
3001   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
3002   NEONMAP1(vld4_v, arm_neon_vld4, 0),
3003   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
3004   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
3005   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3006   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
3007   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
3008   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3009   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3010   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
3011   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
3012   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3013   NEONMAP0(vmovl_v),
3014   NEONMAP0(vmovn_v),
3015   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
3016   NEONMAP0(vmull_v),
3017   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
3018   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3019   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3020   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
3021   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3022   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3023   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
3024   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
3025   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
3026   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
3027   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
3028   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3029   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3030   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
3031   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
3032   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
3033   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
3034   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
3035   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
3036   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
3037   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
3038   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
3039   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
3040   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
3041   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3042   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3043   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3044   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3045   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3046   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3047   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
3048   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
3049   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3050   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3051   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
3052   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3053   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3054   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
3055   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
3056   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3057   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3058   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
3059   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
3060   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
3061   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
3062   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
3063   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
3064   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
3065   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
3066   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
3067   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
3068   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
3069   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
3070   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3071   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3072   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3073   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3074   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3075   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3076   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
3077   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
3078   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
3079   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
3080   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
3081   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
3082   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
3083   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
3084   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
3085   NEONMAP0(vshl_n_v),
3086   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3087   NEONMAP0(vshll_n_v),
3088   NEONMAP0(vshlq_n_v),
3089   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3090   NEONMAP0(vshr_n_v),
3091   NEONMAP0(vshrn_n_v),
3092   NEONMAP0(vshrq_n_v),
3093   NEONMAP1(vst1_v, arm_neon_vst1, 0),
3094   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
3095   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
3096   NEONMAP1(vst2_v, arm_neon_vst2, 0),
3097   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
3098   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
3099   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
3100   NEONMAP1(vst3_v, arm_neon_vst3, 0),
3101   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
3102   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
3103   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
3104   NEONMAP1(vst4_v, arm_neon_vst4, 0),
3105   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
3106   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
3107   NEONMAP0(vsubhn_v),
3108   NEONMAP0(vtrn_v),
3109   NEONMAP0(vtrnq_v),
3110   NEONMAP0(vtst_v),
3111   NEONMAP0(vtstq_v),
3112   NEONMAP0(vuzp_v),
3113   NEONMAP0(vuzpq_v),
3114   NEONMAP0(vzip_v),
3115   NEONMAP0(vzipq_v)
3116 };
3117 
3118 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
3119   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
3120   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
3121   NEONMAP0(vaddhn_v),
3122   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
3123   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
3124   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
3125   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
3126   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
3127   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
3128   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
3129   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
3130   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
3131   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
3132   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
3133   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
3134   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
3135   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
3136   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3137   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3138   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3139   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3140   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
3141   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
3142   NEONMAP0(vcvt_f32_v),
3143   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3144   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3145   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3146   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3147   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3148   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3149   NEONMAP0(vcvtq_f32_v),
3150   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3151   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3152   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3153   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3154   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3155   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3156   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
3157   NEONMAP0(vext_v),
3158   NEONMAP0(vextq_v),
3159   NEONMAP0(vfma_v),
3160   NEONMAP0(vfmaq_v),
3161   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3162   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3163   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3164   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3165   NEONMAP0(vmovl_v),
3166   NEONMAP0(vmovn_v),
3167   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
3168   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
3169   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
3170   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3171   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3172   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
3173   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
3174   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
3175   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3176   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3177   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
3178   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
3179   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
3180   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
3181   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
3182   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
3183   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
3184   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
3185   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
3186   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
3187   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
3188   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3189   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3190   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
3191   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3192   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
3193   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3194   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
3195   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
3196   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3197   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3198   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
3199   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3200   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3201   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
3202   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
3203   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3204   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3205   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3206   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3207   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3208   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3209   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3210   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3211   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
3212   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
3213   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
3214   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
3215   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
3216   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
3217   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
3218   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
3219   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
3220   NEONMAP0(vshl_n_v),
3221   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3222   NEONMAP0(vshll_n_v),
3223   NEONMAP0(vshlq_n_v),
3224   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3225   NEONMAP0(vshr_n_v),
3226   NEONMAP0(vshrn_n_v),
3227   NEONMAP0(vshrq_n_v),
3228   NEONMAP0(vsubhn_v),
3229   NEONMAP0(vtst_v),
3230   NEONMAP0(vtstq_v),
3231 };
3232 
3233 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3234   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3235   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3236   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3237   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3238   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3239   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3240   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3241   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3242   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3243   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3244   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3245   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3246   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3247   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3248   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3249   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3250   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3251   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3252   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3253   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3254   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3255   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3256   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3257   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3258   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3259   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3260   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3261   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3262   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3263   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3264   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3265   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3266   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3267   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3268   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3269   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3270   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3271   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3272   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3273   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3274   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3275   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3276   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3277   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3278   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3279   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3280   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3281   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3282   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3283   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3284   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3285   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3286   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3287   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3288   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3289   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3290   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3291   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3292   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3293   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3294   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3295   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3296   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3297   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3298   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3299   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3300   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3301   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3302   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3303   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3304   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3305   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3306   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3307   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3308   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3309   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3310   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3311   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3312   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3313   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3314   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3315   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3316   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3317   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3318   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3319   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3320   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3321   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3322   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3323   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3324   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3325   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3326   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3327   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3328   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3329   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3330   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3331   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3332   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3333   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3334   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3335   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3336   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3337   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3338   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3339   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3340   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3341   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3342   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3343   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3344   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3345   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3346   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3347   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3348   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3349   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3350   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3351   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3352   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3353   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3354   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3355   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3356   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3357   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3358   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3359   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3360   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3361   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3362   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3363   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3364   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3365   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3366   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3367   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3368   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3369   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3370   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3371   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3372   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3373   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3374   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3375   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3376   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3377   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3378   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3379   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3380   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3381   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3382   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3383   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3384   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3385   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3386   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3387   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3388   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3389   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3390   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3391   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3392   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3393   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3394   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3395   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3396   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3397   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3398   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3399   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3400   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3401   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3402   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3403   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3404   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3405   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3406   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3407   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3408   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3409   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3410   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3411   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3412   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3413   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3414   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3415   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3416   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3417   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3418   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3419   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3420   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3421   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3422   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3423   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3424   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3425   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3426 };
3427 
3428 #undef NEONMAP0
3429 #undef NEONMAP1
3430 #undef NEONMAP2
3431 
3432 static bool NEONSIMDIntrinsicsProvenSorted = false;
3433 
3434 static bool AArch64SIMDIntrinsicsProvenSorted = false;
3435 static bool AArch64SISDIntrinsicsProvenSorted = false;
3436 
3437 
3438 static const NeonIntrinsicInfo *
3439 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
3440                        unsigned BuiltinID, bool &MapProvenSorted) {
3441 
3442 #ifndef NDEBUG
3443   if (!MapProvenSorted) {
3444     assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3445     MapProvenSorted = true;
3446   }
3447 #endif
3448 
3449   const NeonIntrinsicInfo *Builtin =
3450       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3451 
3452   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
3453     return Builtin;
3454 
3455   return nullptr;
3456 }
3457 
3458 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3459                                                    unsigned Modifier,
3460                                                    llvm::Type *ArgType,
3461                                                    const CallExpr *E) {
3462   int VectorSize = 0;
3463   if (Modifier & Use64BitVectors)
3464     VectorSize = 64;
3465   else if (Modifier & Use128BitVectors)
3466     VectorSize = 128;
3467 
3468   // Return type.
3469   SmallVector<llvm::Type *, 3> Tys;
3470   if (Modifier & AddRetType) {
3471     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
3472     if (Modifier & VectorizeRetType)
3473       Ty = llvm::VectorType::get(
3474           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
3475 
3476     Tys.push_back(Ty);
3477   }
3478 
3479   // Arguments.
3480   if (Modifier & VectorizeArgTypes) {
3481     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
3482     ArgType = llvm::VectorType::get(ArgType, Elts);
3483   }
3484 
3485   if (Modifier & (Add1ArgType | Add2ArgTypes))
3486     Tys.push_back(ArgType);
3487 
3488   if (Modifier & Add2ArgTypes)
3489     Tys.push_back(ArgType);
3490 
3491   if (Modifier & InventFloatType)
3492     Tys.push_back(FloatTy);
3493 
3494   return CGM.getIntrinsic(IntrinsicID, Tys);
3495 }
3496 
3497 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
3498                                             const NeonIntrinsicInfo &SISDInfo,
3499                                             SmallVectorImpl<Value *> &Ops,
3500                                             const CallExpr *E) {
3501   unsigned BuiltinID = SISDInfo.BuiltinID;
3502   unsigned int Int = SISDInfo.LLVMIntrinsic;
3503   unsigned Modifier = SISDInfo.TypeModifier;
3504   const char *s = SISDInfo.NameHint;
3505 
3506   switch (BuiltinID) {
3507   case NEON::BI__builtin_neon_vcled_s64:
3508   case NEON::BI__builtin_neon_vcled_u64:
3509   case NEON::BI__builtin_neon_vcles_f32:
3510   case NEON::BI__builtin_neon_vcled_f64:
3511   case NEON::BI__builtin_neon_vcltd_s64:
3512   case NEON::BI__builtin_neon_vcltd_u64:
3513   case NEON::BI__builtin_neon_vclts_f32:
3514   case NEON::BI__builtin_neon_vcltd_f64:
3515   case NEON::BI__builtin_neon_vcales_f32:
3516   case NEON::BI__builtin_neon_vcaled_f64:
3517   case NEON::BI__builtin_neon_vcalts_f32:
3518   case NEON::BI__builtin_neon_vcaltd_f64:
3519     // Only one direction of comparisons actually exist, cmle is actually a cmge
3520     // with swapped operands. The table gives us the right intrinsic but we
3521     // still need to do the swap.
3522     std::swap(Ops[0], Ops[1]);
3523     break;
3524   }
3525 
3526   assert(Int && "Generic code assumes a valid intrinsic");
3527 
3528   // Determine the type(s) of this overloaded AArch64 intrinsic.
3529   const Expr *Arg = E->getArg(0);
3530   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3531   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3532 
3533   int j = 0;
3534   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3535   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3536        ai != ae; ++ai, ++j) {
3537     llvm::Type *ArgTy = ai->getType();
3538     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
3539              ArgTy->getPrimitiveSizeInBits())
3540       continue;
3541 
3542     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
3543     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
3544     // it before inserting.
3545     Ops[j] =
3546         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
3547     Ops[j] =
3548         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
3549   }
3550 
3551   Value *Result = CGF.EmitNeonCall(F, Ops, s);
3552   llvm::Type *ResultType = CGF.ConvertType(E->getType());
3553   if (ResultType->getPrimitiveSizeInBits() <
3554       Result->getType()->getPrimitiveSizeInBits())
3555     return CGF.Builder.CreateExtractElement(Result, C0);
3556 
3557   return CGF.Builder.CreateBitCast(Result, ResultType, s);
3558 }
3559 
3560 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
3561     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
3562     const char *NameHint, unsigned Modifier, const CallExpr *E,
3563     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
3564   // Get the last argument, which specifies the vector type.
3565   llvm::APSInt NeonTypeConst;
3566   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3567   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
3568     return nullptr;
3569 
3570   // Determine the type of this overloaded NEON intrinsic.
3571   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
3572   bool Usgn = Type.isUnsigned();
3573   bool Quad = Type.isQuad();
3574 
3575   llvm::VectorType *VTy = GetNeonType(this, Type);
3576   llvm::Type *Ty = VTy;
3577   if (!Ty)
3578     return nullptr;
3579 
3580   auto getAlignmentValue32 = [&](Address addr) -> Value* {
3581     return Builder.getInt32(addr.getAlignment().getQuantity());
3582   };
3583 
3584   unsigned Int = LLVMIntrinsic;
3585   if ((Modifier & UnsignedAlts) && !Usgn)
3586     Int = AltLLVMIntrinsic;
3587 
3588   switch (BuiltinID) {
3589   default: break;
3590   case NEON::BI__builtin_neon_vabs_v:
3591   case NEON::BI__builtin_neon_vabsq_v:
3592     if (VTy->getElementType()->isFloatingPointTy())
3593       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
3594     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
3595   case NEON::BI__builtin_neon_vaddhn_v: {
3596     llvm::VectorType *SrcTy =
3597         llvm::VectorType::getExtendedElementVectorType(VTy);
3598 
3599     // %sum = add <4 x i32> %lhs, %rhs
3600     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3601     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3602     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3603 
3604     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3605     Constant *ShiftAmt =
3606         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3607     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3608 
3609     // %res = trunc <4 x i32> %high to <4 x i16>
3610     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3611   }
3612   case NEON::BI__builtin_neon_vcale_v:
3613   case NEON::BI__builtin_neon_vcaleq_v:
3614   case NEON::BI__builtin_neon_vcalt_v:
3615   case NEON::BI__builtin_neon_vcaltq_v:
3616     std::swap(Ops[0], Ops[1]);
3617   case NEON::BI__builtin_neon_vcage_v:
3618   case NEON::BI__builtin_neon_vcageq_v:
3619   case NEON::BI__builtin_neon_vcagt_v:
3620   case NEON::BI__builtin_neon_vcagtq_v: {
3621     llvm::Type *VecFlt = llvm::VectorType::get(
3622         VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
3623         VTy->getNumElements());
3624     llvm::Type *Tys[] = { VTy, VecFlt };
3625     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3626     return EmitNeonCall(F, Ops, NameHint);
3627   }
3628   case NEON::BI__builtin_neon_vclz_v:
3629   case NEON::BI__builtin_neon_vclzq_v:
3630     // We generate target-independent intrinsic, which needs a second argument
3631     // for whether or not clz of zero is undefined; on ARM it isn't.
3632     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3633     break;
3634   case NEON::BI__builtin_neon_vcvt_f32_v:
3635   case NEON::BI__builtin_neon_vcvtq_f32_v:
3636     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3637     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3638     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3639                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3640   case NEON::BI__builtin_neon_vcvt_n_f32_v:
3641   case NEON::BI__builtin_neon_vcvt_n_f64_v:
3642   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3643   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3644     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3645     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
3646     Function *F = CGM.getIntrinsic(Int, Tys);
3647     return EmitNeonCall(F, Ops, "vcvt_n");
3648   }
3649   case NEON::BI__builtin_neon_vcvt_n_s32_v:
3650   case NEON::BI__builtin_neon_vcvt_n_u32_v:
3651   case NEON::BI__builtin_neon_vcvt_n_s64_v:
3652   case NEON::BI__builtin_neon_vcvt_n_u64_v:
3653   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3654   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3655   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3656   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3657     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3658     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3659     return EmitNeonCall(F, Ops, "vcvt_n");
3660   }
3661   case NEON::BI__builtin_neon_vcvt_s32_v:
3662   case NEON::BI__builtin_neon_vcvt_u32_v:
3663   case NEON::BI__builtin_neon_vcvt_s64_v:
3664   case NEON::BI__builtin_neon_vcvt_u64_v:
3665   case NEON::BI__builtin_neon_vcvtq_s32_v:
3666   case NEON::BI__builtin_neon_vcvtq_u32_v:
3667   case NEON::BI__builtin_neon_vcvtq_s64_v:
3668   case NEON::BI__builtin_neon_vcvtq_u64_v: {
3669     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3670     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3671                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3672   }
3673   case NEON::BI__builtin_neon_vcvta_s32_v:
3674   case NEON::BI__builtin_neon_vcvta_s64_v:
3675   case NEON::BI__builtin_neon_vcvta_u32_v:
3676   case NEON::BI__builtin_neon_vcvta_u64_v:
3677   case NEON::BI__builtin_neon_vcvtaq_s32_v:
3678   case NEON::BI__builtin_neon_vcvtaq_s64_v:
3679   case NEON::BI__builtin_neon_vcvtaq_u32_v:
3680   case NEON::BI__builtin_neon_vcvtaq_u64_v:
3681   case NEON::BI__builtin_neon_vcvtn_s32_v:
3682   case NEON::BI__builtin_neon_vcvtn_s64_v:
3683   case NEON::BI__builtin_neon_vcvtn_u32_v:
3684   case NEON::BI__builtin_neon_vcvtn_u64_v:
3685   case NEON::BI__builtin_neon_vcvtnq_s32_v:
3686   case NEON::BI__builtin_neon_vcvtnq_s64_v:
3687   case NEON::BI__builtin_neon_vcvtnq_u32_v:
3688   case NEON::BI__builtin_neon_vcvtnq_u64_v:
3689   case NEON::BI__builtin_neon_vcvtp_s32_v:
3690   case NEON::BI__builtin_neon_vcvtp_s64_v:
3691   case NEON::BI__builtin_neon_vcvtp_u32_v:
3692   case NEON::BI__builtin_neon_vcvtp_u64_v:
3693   case NEON::BI__builtin_neon_vcvtpq_s32_v:
3694   case NEON::BI__builtin_neon_vcvtpq_s64_v:
3695   case NEON::BI__builtin_neon_vcvtpq_u32_v:
3696   case NEON::BI__builtin_neon_vcvtpq_u64_v:
3697   case NEON::BI__builtin_neon_vcvtm_s32_v:
3698   case NEON::BI__builtin_neon_vcvtm_s64_v:
3699   case NEON::BI__builtin_neon_vcvtm_u32_v:
3700   case NEON::BI__builtin_neon_vcvtm_u64_v:
3701   case NEON::BI__builtin_neon_vcvtmq_s32_v:
3702   case NEON::BI__builtin_neon_vcvtmq_s64_v:
3703   case NEON::BI__builtin_neon_vcvtmq_u32_v:
3704   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
3705     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3706     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
3707   }
3708   case NEON::BI__builtin_neon_vext_v:
3709   case NEON::BI__builtin_neon_vextq_v: {
3710     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3711     SmallVector<uint32_t, 16> Indices;
3712     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3713       Indices.push_back(i+CV);
3714 
3715     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3716     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3717     return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
3718   }
3719   case NEON::BI__builtin_neon_vfma_v:
3720   case NEON::BI__builtin_neon_vfmaq_v: {
3721     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
3722     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3723     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3724     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3725 
3726     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
3727     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
3728   }
3729   case NEON::BI__builtin_neon_vld1_v:
3730   case NEON::BI__builtin_neon_vld1q_v: {
3731     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3732     Ops.push_back(getAlignmentValue32(PtrOp0));
3733     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
3734   }
3735   case NEON::BI__builtin_neon_vld2_v:
3736   case NEON::BI__builtin_neon_vld2q_v:
3737   case NEON::BI__builtin_neon_vld3_v:
3738   case NEON::BI__builtin_neon_vld3q_v:
3739   case NEON::BI__builtin_neon_vld4_v:
3740   case NEON::BI__builtin_neon_vld4q_v: {
3741     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3742     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3743     Value *Align = getAlignmentValue32(PtrOp1);
3744     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
3745     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3746     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3747     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3748   }
3749   case NEON::BI__builtin_neon_vld1_dup_v:
3750   case NEON::BI__builtin_neon_vld1q_dup_v: {
3751     Value *V = UndefValue::get(Ty);
3752     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3753     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
3754     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
3755     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3756     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
3757     return EmitNeonSplat(Ops[0], CI);
3758   }
3759   case NEON::BI__builtin_neon_vld2_lane_v:
3760   case NEON::BI__builtin_neon_vld2q_lane_v:
3761   case NEON::BI__builtin_neon_vld3_lane_v:
3762   case NEON::BI__builtin_neon_vld3q_lane_v:
3763   case NEON::BI__builtin_neon_vld4_lane_v:
3764   case NEON::BI__builtin_neon_vld4q_lane_v: {
3765     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3766     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3767     for (unsigned I = 2; I < Ops.size() - 1; ++I)
3768       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
3769     Ops.push_back(getAlignmentValue32(PtrOp1));
3770     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
3771     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3772     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3773     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3774   }
3775   case NEON::BI__builtin_neon_vmovl_v: {
3776     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
3777     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
3778     if (Usgn)
3779       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
3780     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
3781   }
3782   case NEON::BI__builtin_neon_vmovn_v: {
3783     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3784     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
3785     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
3786   }
3787   case NEON::BI__builtin_neon_vmull_v:
3788     // FIXME: the integer vmull operations could be emitted in terms of pure
3789     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
3790     // hoisting the exts outside loops. Until global ISel comes along that can
3791     // see through such movement this leads to bad CodeGen. So we need an
3792     // intrinsic for now.
3793     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
3794     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
3795     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
3796   case NEON::BI__builtin_neon_vpadal_v:
3797   case NEON::BI__builtin_neon_vpadalq_v: {
3798     // The source operand type has twice as many elements of half the size.
3799     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3800     llvm::Type *EltTy =
3801       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3802     llvm::Type *NarrowTy =
3803       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3804     llvm::Type *Tys[2] = { Ty, NarrowTy };
3805     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
3806   }
3807   case NEON::BI__builtin_neon_vpaddl_v:
3808   case NEON::BI__builtin_neon_vpaddlq_v: {
3809     // The source operand type has twice as many elements of half the size.
3810     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3811     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3812     llvm::Type *NarrowTy =
3813       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3814     llvm::Type *Tys[2] = { Ty, NarrowTy };
3815     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
3816   }
3817   case NEON::BI__builtin_neon_vqdmlal_v:
3818   case NEON::BI__builtin_neon_vqdmlsl_v: {
3819     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
3820     Ops[1] =
3821         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
3822     Ops.resize(2);
3823     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
3824   }
3825   case NEON::BI__builtin_neon_vqshl_n_v:
3826   case NEON::BI__builtin_neon_vqshlq_n_v:
3827     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
3828                         1, false);
3829   case NEON::BI__builtin_neon_vqshlu_n_v:
3830   case NEON::BI__builtin_neon_vqshluq_n_v:
3831     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
3832                         1, false);
3833   case NEON::BI__builtin_neon_vrecpe_v:
3834   case NEON::BI__builtin_neon_vrecpeq_v:
3835   case NEON::BI__builtin_neon_vrsqrte_v:
3836   case NEON::BI__builtin_neon_vrsqrteq_v:
3837     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
3838     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
3839 
3840   case NEON::BI__builtin_neon_vrshr_n_v:
3841   case NEON::BI__builtin_neon_vrshrq_n_v:
3842     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
3843                         1, true);
3844   case NEON::BI__builtin_neon_vshl_n_v:
3845   case NEON::BI__builtin_neon_vshlq_n_v:
3846     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
3847     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
3848                              "vshl_n");
3849   case NEON::BI__builtin_neon_vshll_n_v: {
3850     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
3851     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3852     if (Usgn)
3853       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
3854     else
3855       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
3856     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
3857     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
3858   }
3859   case NEON::BI__builtin_neon_vshrn_n_v: {
3860     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3861     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3862     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
3863     if (Usgn)
3864       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
3865     else
3866       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
3867     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
3868   }
3869   case NEON::BI__builtin_neon_vshr_n_v:
3870   case NEON::BI__builtin_neon_vshrq_n_v:
3871     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
3872   case NEON::BI__builtin_neon_vst1_v:
3873   case NEON::BI__builtin_neon_vst1q_v:
3874   case NEON::BI__builtin_neon_vst2_v:
3875   case NEON::BI__builtin_neon_vst2q_v:
3876   case NEON::BI__builtin_neon_vst3_v:
3877   case NEON::BI__builtin_neon_vst3q_v:
3878   case NEON::BI__builtin_neon_vst4_v:
3879   case NEON::BI__builtin_neon_vst4q_v:
3880   case NEON::BI__builtin_neon_vst2_lane_v:
3881   case NEON::BI__builtin_neon_vst2q_lane_v:
3882   case NEON::BI__builtin_neon_vst3_lane_v:
3883   case NEON::BI__builtin_neon_vst3q_lane_v:
3884   case NEON::BI__builtin_neon_vst4_lane_v:
3885   case NEON::BI__builtin_neon_vst4q_lane_v: {
3886     llvm::Type *Tys[] = {Int8PtrTy, Ty};
3887     Ops.push_back(getAlignmentValue32(PtrOp0));
3888     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
3889   }
3890   case NEON::BI__builtin_neon_vsubhn_v: {
3891     llvm::VectorType *SrcTy =
3892         llvm::VectorType::getExtendedElementVectorType(VTy);
3893 
3894     // %sum = add <4 x i32> %lhs, %rhs
3895     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3896     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3897     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
3898 
3899     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3900     Constant *ShiftAmt =
3901         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3902     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
3903 
3904     // %res = trunc <4 x i32> %high to <4 x i16>
3905     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
3906   }
3907   case NEON::BI__builtin_neon_vtrn_v:
3908   case NEON::BI__builtin_neon_vtrnq_v: {
3909     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3910     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3911     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3912     Value *SV = nullptr;
3913 
3914     for (unsigned vi = 0; vi != 2; ++vi) {
3915       SmallVector<uint32_t, 16> Indices;
3916       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3917         Indices.push_back(i+vi);
3918         Indices.push_back(i+e+vi);
3919       }
3920       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3921       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
3922       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3923     }
3924     return SV;
3925   }
3926   case NEON::BI__builtin_neon_vtst_v:
3927   case NEON::BI__builtin_neon_vtstq_v: {
3928     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3929     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3930     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
3931     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
3932                                 ConstantAggregateZero::get(Ty));
3933     return Builder.CreateSExt(Ops[0], Ty, "vtst");
3934   }
3935   case NEON::BI__builtin_neon_vuzp_v:
3936   case NEON::BI__builtin_neon_vuzpq_v: {
3937     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3938     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3939     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3940     Value *SV = nullptr;
3941 
3942     for (unsigned vi = 0; vi != 2; ++vi) {
3943       SmallVector<uint32_t, 16> Indices;
3944       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3945         Indices.push_back(2*i+vi);
3946 
3947       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3948       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
3949       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3950     }
3951     return SV;
3952   }
3953   case NEON::BI__builtin_neon_vzip_v:
3954   case NEON::BI__builtin_neon_vzipq_v: {
3955     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3956     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3957     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3958     Value *SV = nullptr;
3959 
3960     for (unsigned vi = 0; vi != 2; ++vi) {
3961       SmallVector<uint32_t, 16> Indices;
3962       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3963         Indices.push_back((i + vi*e) >> 1);
3964         Indices.push_back(((i + vi*e) >> 1)+e);
3965       }
3966       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3967       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
3968       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3969     }
3970     return SV;
3971   }
3972   }
3973 
3974   assert(Int && "Expected valid intrinsic number");
3975 
3976   // Determine the type(s) of this overloaded AArch64 intrinsic.
3977   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
3978 
3979   Value *Result = EmitNeonCall(F, Ops, NameHint);
3980   llvm::Type *ResultType = ConvertType(E->getType());
3981   // AArch64 intrinsic one-element vector type cast to
3982   // scalar type expected by the builtin
3983   return Builder.CreateBitCast(Result, ResultType, NameHint);
3984 }
3985 
3986 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
3987     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
3988     const CmpInst::Predicate Ip, const Twine &Name) {
3989   llvm::Type *OTy = Op->getType();
3990 
3991   // FIXME: this is utterly horrific. We should not be looking at previous
3992   // codegen context to find out what needs doing. Unfortunately TableGen
3993   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
3994   // (etc).
3995   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
3996     OTy = BI->getOperand(0)->getType();
3997 
3998   Op = Builder.CreateBitCast(Op, OTy);
3999   if (OTy->getScalarType()->isFloatingPointTy()) {
4000     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
4001   } else {
4002     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
4003   }
4004   return Builder.CreateSExt(Op, Ty, Name);
4005 }
4006 
4007 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
4008                                  Value *ExtOp, Value *IndexOp,
4009                                  llvm::Type *ResTy, unsigned IntID,
4010                                  const char *Name) {
4011   SmallVector<Value *, 2> TblOps;
4012   if (ExtOp)
4013     TblOps.push_back(ExtOp);
4014 
4015   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
4016   SmallVector<uint32_t, 16> Indices;
4017   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
4018   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
4019     Indices.push_back(2*i);
4020     Indices.push_back(2*i+1);
4021   }
4022 
4023   int PairPos = 0, End = Ops.size() - 1;
4024   while (PairPos < End) {
4025     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4026                                                      Ops[PairPos+1], Indices,
4027                                                      Name));
4028     PairPos += 2;
4029   }
4030 
4031   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
4032   // of the 128-bit lookup table with zero.
4033   if (PairPos == End) {
4034     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
4035     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4036                                                      ZeroTbl, Indices, Name));
4037   }
4038 
4039   Function *TblF;
4040   TblOps.push_back(IndexOp);
4041   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
4042 
4043   return CGF.EmitNeonCall(TblF, TblOps, Name);
4044 }
4045 
4046 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
4047   unsigned Value;
4048   switch (BuiltinID) {
4049   default:
4050     return nullptr;
4051   case ARM::BI__builtin_arm_nop:
4052     Value = 0;
4053     break;
4054   case ARM::BI__builtin_arm_yield:
4055   case ARM::BI__yield:
4056     Value = 1;
4057     break;
4058   case ARM::BI__builtin_arm_wfe:
4059   case ARM::BI__wfe:
4060     Value = 2;
4061     break;
4062   case ARM::BI__builtin_arm_wfi:
4063   case ARM::BI__wfi:
4064     Value = 3;
4065     break;
4066   case ARM::BI__builtin_arm_sev:
4067   case ARM::BI__sev:
4068     Value = 4;
4069     break;
4070   case ARM::BI__builtin_arm_sevl:
4071   case ARM::BI__sevl:
4072     Value = 5;
4073     break;
4074   }
4075 
4076   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
4077                             llvm::ConstantInt::get(Int32Ty, Value));
4078 }
4079 
4080 // Generates the IR for the read/write special register builtin,
4081 // ValueType is the type of the value that is to be written or read,
4082 // RegisterType is the type of the register being written to or read from.
4083 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
4084                                          const CallExpr *E,
4085                                          llvm::Type *RegisterType,
4086                                          llvm::Type *ValueType,
4087                                          bool IsRead,
4088                                          StringRef SysReg = "") {
4089   // write and register intrinsics only support 32 and 64 bit operations.
4090   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
4091           && "Unsupported size for register.");
4092 
4093   CodeGen::CGBuilderTy &Builder = CGF.Builder;
4094   CodeGen::CodeGenModule &CGM = CGF.CGM;
4095   LLVMContext &Context = CGM.getLLVMContext();
4096 
4097   if (SysReg.empty()) {
4098     const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
4099     SysReg = cast<StringLiteral>(SysRegStrExpr)->getString();
4100   }
4101 
4102   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
4103   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4104   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4105 
4106   llvm::Type *Types[] = { RegisterType };
4107 
4108   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
4109   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
4110             && "Can't fit 64-bit value in 32-bit register");
4111 
4112   if (IsRead) {
4113     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
4114     llvm::Value *Call = Builder.CreateCall(F, Metadata);
4115 
4116     if (MixedTypes)
4117       // Read into 64 bit register and then truncate result to 32 bit.
4118       return Builder.CreateTrunc(Call, ValueType);
4119 
4120     if (ValueType->isPointerTy())
4121       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
4122       return Builder.CreateIntToPtr(Call, ValueType);
4123 
4124     return Call;
4125   }
4126 
4127   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
4128   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
4129   if (MixedTypes) {
4130     // Extend 32 bit write value to 64 bit to pass to write.
4131     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
4132     return Builder.CreateCall(F, { Metadata, ArgValue });
4133   }
4134 
4135   if (ValueType->isPointerTy()) {
4136     // Have VoidPtrTy ArgValue but want to return an i32/i64.
4137     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
4138     return Builder.CreateCall(F, { Metadata, ArgValue });
4139   }
4140 
4141   return Builder.CreateCall(F, { Metadata, ArgValue });
4142 }
4143 
4144 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
4145 /// argument that specifies the vector type.
4146 static bool HasExtraNeonArgument(unsigned BuiltinID) {
4147   switch (BuiltinID) {
4148   default: break;
4149   case NEON::BI__builtin_neon_vget_lane_i8:
4150   case NEON::BI__builtin_neon_vget_lane_i16:
4151   case NEON::BI__builtin_neon_vget_lane_i32:
4152   case NEON::BI__builtin_neon_vget_lane_i64:
4153   case NEON::BI__builtin_neon_vget_lane_f32:
4154   case NEON::BI__builtin_neon_vgetq_lane_i8:
4155   case NEON::BI__builtin_neon_vgetq_lane_i16:
4156   case NEON::BI__builtin_neon_vgetq_lane_i32:
4157   case NEON::BI__builtin_neon_vgetq_lane_i64:
4158   case NEON::BI__builtin_neon_vgetq_lane_f32:
4159   case NEON::BI__builtin_neon_vset_lane_i8:
4160   case NEON::BI__builtin_neon_vset_lane_i16:
4161   case NEON::BI__builtin_neon_vset_lane_i32:
4162   case NEON::BI__builtin_neon_vset_lane_i64:
4163   case NEON::BI__builtin_neon_vset_lane_f32:
4164   case NEON::BI__builtin_neon_vsetq_lane_i8:
4165   case NEON::BI__builtin_neon_vsetq_lane_i16:
4166   case NEON::BI__builtin_neon_vsetq_lane_i32:
4167   case NEON::BI__builtin_neon_vsetq_lane_i64:
4168   case NEON::BI__builtin_neon_vsetq_lane_f32:
4169   case NEON::BI__builtin_neon_vsha1h_u32:
4170   case NEON::BI__builtin_neon_vsha1cq_u32:
4171   case NEON::BI__builtin_neon_vsha1pq_u32:
4172   case NEON::BI__builtin_neon_vsha1mq_u32:
4173   case ARM::BI_MoveToCoprocessor:
4174   case ARM::BI_MoveToCoprocessor2:
4175     return false;
4176   }
4177   return true;
4178 }
4179 
4180 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
4181                                            const CallExpr *E) {
4182   if (auto Hint = GetValueForARMHint(BuiltinID))
4183     return Hint;
4184 
4185   if (BuiltinID == ARM::BI__emit) {
4186     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
4187     llvm::FunctionType *FTy =
4188         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
4189 
4190     APSInt Value;
4191     if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
4192       llvm_unreachable("Sema will ensure that the parameter is constant");
4193 
4194     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
4195 
4196     llvm::InlineAsm *Emit =
4197         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
4198                                  /*SideEffects=*/true)
4199                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
4200                                  /*SideEffects=*/true);
4201 
4202     return Builder.CreateCall(Emit);
4203   }
4204 
4205   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
4206     Value *Option = EmitScalarExpr(E->getArg(0));
4207     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
4208   }
4209 
4210   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
4211     Value *Address = EmitScalarExpr(E->getArg(0));
4212     Value *RW      = EmitScalarExpr(E->getArg(1));
4213     Value *IsData  = EmitScalarExpr(E->getArg(2));
4214 
4215     // Locality is not supported on ARM target
4216     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
4217 
4218     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4219     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4220   }
4221 
4222   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
4223     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit),
4224                                                EmitScalarExpr(E->getArg(0)),
4225                               "rbit");
4226   }
4227 
4228   if (BuiltinID == ARM::BI__clear_cache) {
4229     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4230     const FunctionDecl *FD = E->getDirectCallee();
4231     Value *Ops[2];
4232     for (unsigned i = 0; i < 2; i++)
4233       Ops[i] = EmitScalarExpr(E->getArg(i));
4234     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4235     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4236     StringRef Name = FD->getName();
4237     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4238   }
4239 
4240   if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4241       BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4242     Function *F;
4243 
4244     switch (BuiltinID) {
4245     default: llvm_unreachable("unexpected builtin");
4246     case ARM::BI__builtin_arm_mcrr:
4247       F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4248       break;
4249     case ARM::BI__builtin_arm_mcrr2:
4250       F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4251       break;
4252     }
4253 
4254     // MCRR{2} instruction has 5 operands but
4255     // the intrinsic has 4 because Rt and Rt2
4256     // are represented as a single unsigned 64
4257     // bit integer in the intrinsic definition
4258     // but internally it's represented as 2 32
4259     // bit integers.
4260 
4261     Value *Coproc = EmitScalarExpr(E->getArg(0));
4262     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4263     Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4264     Value *CRm = EmitScalarExpr(E->getArg(3));
4265 
4266     Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4267     Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4268     Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4269     Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4270 
4271     return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4272   }
4273 
4274   if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4275       BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4276     Function *F;
4277 
4278     switch (BuiltinID) {
4279     default: llvm_unreachable("unexpected builtin");
4280     case ARM::BI__builtin_arm_mrrc:
4281       F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4282       break;
4283     case ARM::BI__builtin_arm_mrrc2:
4284       F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4285       break;
4286     }
4287 
4288     Value *Coproc = EmitScalarExpr(E->getArg(0));
4289     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4290     Value *CRm  = EmitScalarExpr(E->getArg(2));
4291     Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4292 
4293     // Returns an unsigned 64 bit integer, represented
4294     // as two 32 bit integers.
4295 
4296     Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4297     Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4298     Rt = Builder.CreateZExt(Rt, Int64Ty);
4299     Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4300 
4301     Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4302     RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4303     RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4304 
4305     return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4306   }
4307 
4308   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4309       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4310         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4311        getContext().getTypeSize(E->getType()) == 64) ||
4312       BuiltinID == ARM::BI__ldrexd) {
4313     Function *F;
4314 
4315     switch (BuiltinID) {
4316     default: llvm_unreachable("unexpected builtin");
4317     case ARM::BI__builtin_arm_ldaex:
4318       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4319       break;
4320     case ARM::BI__builtin_arm_ldrexd:
4321     case ARM::BI__builtin_arm_ldrex:
4322     case ARM::BI__ldrexd:
4323       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4324       break;
4325     }
4326 
4327     Value *LdPtr = EmitScalarExpr(E->getArg(0));
4328     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4329                                     "ldrexd");
4330 
4331     Value *Val0 = Builder.CreateExtractValue(Val, 1);
4332     Value *Val1 = Builder.CreateExtractValue(Val, 0);
4333     Val0 = Builder.CreateZExt(Val0, Int64Ty);
4334     Val1 = Builder.CreateZExt(Val1, Int64Ty);
4335 
4336     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4337     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4338     Val = Builder.CreateOr(Val, Val1);
4339     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4340   }
4341 
4342   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4343       BuiltinID == ARM::BI__builtin_arm_ldaex) {
4344     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4345 
4346     QualType Ty = E->getType();
4347     llvm::Type *RealResTy = ConvertType(Ty);
4348     llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
4349                                                   getContext().getTypeSize(Ty));
4350     LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
4351 
4352     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4353                                        ? Intrinsic::arm_ldaex
4354                                        : Intrinsic::arm_ldrex,
4355                                    LoadAddr->getType());
4356     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4357 
4358     if (RealResTy->isPointerTy())
4359       return Builder.CreateIntToPtr(Val, RealResTy);
4360     else {
4361       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4362       return Builder.CreateBitCast(Val, RealResTy);
4363     }
4364   }
4365 
4366   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
4367       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4368         BuiltinID == ARM::BI__builtin_arm_strex) &&
4369        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
4370     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4371                                        ? Intrinsic::arm_stlexd
4372                                        : Intrinsic::arm_strexd);
4373     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr);
4374 
4375     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4376     Value *Val = EmitScalarExpr(E->getArg(0));
4377     Builder.CreateStore(Val, Tmp);
4378 
4379     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4380     Val = Builder.CreateLoad(LdPtr);
4381 
4382     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4383     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4384     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4385     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4386   }
4387 
4388   if (BuiltinID == ARM::BI__builtin_arm_strex ||
4389       BuiltinID == ARM::BI__builtin_arm_stlex) {
4390     Value *StoreVal = EmitScalarExpr(E->getArg(0));
4391     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4392 
4393     QualType Ty = E->getArg(0)->getType();
4394     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4395                                                  getContext().getTypeSize(Ty));
4396     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4397 
4398     if (StoreVal->getType()->isPointerTy())
4399       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4400     else {
4401       StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
4402       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4403     }
4404 
4405     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4406                                        ? Intrinsic::arm_stlex
4407                                        : Intrinsic::arm_strex,
4408                                    StoreAddr->getType());
4409     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4410   }
4411 
4412   switch (BuiltinID) {
4413   case ARM::BI__iso_volatile_load8:
4414   case ARM::BI__iso_volatile_load16:
4415   case ARM::BI__iso_volatile_load32:
4416   case ARM::BI__iso_volatile_load64: {
4417     Value *Ptr = EmitScalarExpr(E->getArg(0));
4418     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4419     CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
4420     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4421                                              LoadSize.getQuantity() * 8);
4422     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4423     llvm::LoadInst *Load =
4424       Builder.CreateAlignedLoad(Ptr, LoadSize);
4425     Load->setVolatile(true);
4426     return Load;
4427   }
4428   case ARM::BI__iso_volatile_store8:
4429   case ARM::BI__iso_volatile_store16:
4430   case ARM::BI__iso_volatile_store32:
4431   case ARM::BI__iso_volatile_store64: {
4432     Value *Ptr = EmitScalarExpr(E->getArg(0));
4433     Value *Value = EmitScalarExpr(E->getArg(1));
4434     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4435     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
4436     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4437                                              StoreSize.getQuantity() * 8);
4438     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4439     llvm::StoreInst *Store =
4440       Builder.CreateAlignedStore(Value, Ptr,
4441                                  StoreSize);
4442     Store->setVolatile(true);
4443     return Store;
4444   }
4445   }
4446 
4447   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
4448     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4449     return Builder.CreateCall(F);
4450   }
4451 
4452   // CRC32
4453   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4454   switch (BuiltinID) {
4455   case ARM::BI__builtin_arm_crc32b:
4456     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
4457   case ARM::BI__builtin_arm_crc32cb:
4458     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
4459   case ARM::BI__builtin_arm_crc32h:
4460     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
4461   case ARM::BI__builtin_arm_crc32ch:
4462     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
4463   case ARM::BI__builtin_arm_crc32w:
4464   case ARM::BI__builtin_arm_crc32d:
4465     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
4466   case ARM::BI__builtin_arm_crc32cw:
4467   case ARM::BI__builtin_arm_crc32cd:
4468     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
4469   }
4470 
4471   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4472     Value *Arg0 = EmitScalarExpr(E->getArg(0));
4473     Value *Arg1 = EmitScalarExpr(E->getArg(1));
4474 
4475     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
4476     // intrinsics, hence we need different codegen for these cases.
4477     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
4478         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
4479       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4480       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
4481       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
4482       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
4483 
4484       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4485       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
4486       return Builder.CreateCall(F, {Res, Arg1b});
4487     } else {
4488       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
4489 
4490       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4491       return Builder.CreateCall(F, {Arg0, Arg1});
4492     }
4493   }
4494 
4495   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
4496       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4497       BuiltinID == ARM::BI__builtin_arm_rsrp ||
4498       BuiltinID == ARM::BI__builtin_arm_wsr ||
4499       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
4500       BuiltinID == ARM::BI__builtin_arm_wsrp) {
4501 
4502     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
4503                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4504                   BuiltinID == ARM::BI__builtin_arm_rsrp;
4505 
4506     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
4507                             BuiltinID == ARM::BI__builtin_arm_wsrp;
4508 
4509     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4510                    BuiltinID == ARM::BI__builtin_arm_wsr64;
4511 
4512     llvm::Type *ValueType;
4513     llvm::Type *RegisterType;
4514     if (IsPointerBuiltin) {
4515       ValueType = VoidPtrTy;
4516       RegisterType = Int32Ty;
4517     } else if (Is64Bit) {
4518       ValueType = RegisterType = Int64Ty;
4519     } else {
4520       ValueType = RegisterType = Int32Ty;
4521     }
4522 
4523     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4524   }
4525 
4526   // Find out if any arguments are required to be integer constant
4527   // expressions.
4528   unsigned ICEArguments = 0;
4529   ASTContext::GetBuiltinTypeError Error;
4530   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4531   assert(Error == ASTContext::GE_None && "Should not codegen an error");
4532 
4533   auto getAlignmentValue32 = [&](Address addr) -> Value* {
4534     return Builder.getInt32(addr.getAlignment().getQuantity());
4535   };
4536 
4537   Address PtrOp0 = Address::invalid();
4538   Address PtrOp1 = Address::invalid();
4539   SmallVector<Value*, 4> Ops;
4540   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
4541   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
4542   for (unsigned i = 0, e = NumArgs; i != e; i++) {
4543     if (i == 0) {
4544       switch (BuiltinID) {
4545       case NEON::BI__builtin_neon_vld1_v:
4546       case NEON::BI__builtin_neon_vld1q_v:
4547       case NEON::BI__builtin_neon_vld1q_lane_v:
4548       case NEON::BI__builtin_neon_vld1_lane_v:
4549       case NEON::BI__builtin_neon_vld1_dup_v:
4550       case NEON::BI__builtin_neon_vld1q_dup_v:
4551       case NEON::BI__builtin_neon_vst1_v:
4552       case NEON::BI__builtin_neon_vst1q_v:
4553       case NEON::BI__builtin_neon_vst1q_lane_v:
4554       case NEON::BI__builtin_neon_vst1_lane_v:
4555       case NEON::BI__builtin_neon_vst2_v:
4556       case NEON::BI__builtin_neon_vst2q_v:
4557       case NEON::BI__builtin_neon_vst2_lane_v:
4558       case NEON::BI__builtin_neon_vst2q_lane_v:
4559       case NEON::BI__builtin_neon_vst3_v:
4560       case NEON::BI__builtin_neon_vst3q_v:
4561       case NEON::BI__builtin_neon_vst3_lane_v:
4562       case NEON::BI__builtin_neon_vst3q_lane_v:
4563       case NEON::BI__builtin_neon_vst4_v:
4564       case NEON::BI__builtin_neon_vst4q_v:
4565       case NEON::BI__builtin_neon_vst4_lane_v:
4566       case NEON::BI__builtin_neon_vst4q_lane_v:
4567         // Get the alignment for the argument in addition to the value;
4568         // we'll use it later.
4569         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
4570         Ops.push_back(PtrOp0.getPointer());
4571         continue;
4572       }
4573     }
4574     if (i == 1) {
4575       switch (BuiltinID) {
4576       case NEON::BI__builtin_neon_vld2_v:
4577       case NEON::BI__builtin_neon_vld2q_v:
4578       case NEON::BI__builtin_neon_vld3_v:
4579       case NEON::BI__builtin_neon_vld3q_v:
4580       case NEON::BI__builtin_neon_vld4_v:
4581       case NEON::BI__builtin_neon_vld4q_v:
4582       case NEON::BI__builtin_neon_vld2_lane_v:
4583       case NEON::BI__builtin_neon_vld2q_lane_v:
4584       case NEON::BI__builtin_neon_vld3_lane_v:
4585       case NEON::BI__builtin_neon_vld3q_lane_v:
4586       case NEON::BI__builtin_neon_vld4_lane_v:
4587       case NEON::BI__builtin_neon_vld4q_lane_v:
4588       case NEON::BI__builtin_neon_vld2_dup_v:
4589       case NEON::BI__builtin_neon_vld3_dup_v:
4590       case NEON::BI__builtin_neon_vld4_dup_v:
4591         // Get the alignment for the argument in addition to the value;
4592         // we'll use it later.
4593         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
4594         Ops.push_back(PtrOp1.getPointer());
4595         continue;
4596       }
4597     }
4598 
4599     if ((ICEArguments & (1 << i)) == 0) {
4600       Ops.push_back(EmitScalarExpr(E->getArg(i)));
4601     } else {
4602       // If this is required to be a constant, constant fold it so that we know
4603       // that the generated intrinsic gets a ConstantInt.
4604       llvm::APSInt Result;
4605       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4606       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
4607       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4608     }
4609   }
4610 
4611   switch (BuiltinID) {
4612   default: break;
4613 
4614   case NEON::BI__builtin_neon_vget_lane_i8:
4615   case NEON::BI__builtin_neon_vget_lane_i16:
4616   case NEON::BI__builtin_neon_vget_lane_i32:
4617   case NEON::BI__builtin_neon_vget_lane_i64:
4618   case NEON::BI__builtin_neon_vget_lane_f32:
4619   case NEON::BI__builtin_neon_vgetq_lane_i8:
4620   case NEON::BI__builtin_neon_vgetq_lane_i16:
4621   case NEON::BI__builtin_neon_vgetq_lane_i32:
4622   case NEON::BI__builtin_neon_vgetq_lane_i64:
4623   case NEON::BI__builtin_neon_vgetq_lane_f32:
4624     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
4625 
4626   case NEON::BI__builtin_neon_vset_lane_i8:
4627   case NEON::BI__builtin_neon_vset_lane_i16:
4628   case NEON::BI__builtin_neon_vset_lane_i32:
4629   case NEON::BI__builtin_neon_vset_lane_i64:
4630   case NEON::BI__builtin_neon_vset_lane_f32:
4631   case NEON::BI__builtin_neon_vsetq_lane_i8:
4632   case NEON::BI__builtin_neon_vsetq_lane_i16:
4633   case NEON::BI__builtin_neon_vsetq_lane_i32:
4634   case NEON::BI__builtin_neon_vsetq_lane_i64:
4635   case NEON::BI__builtin_neon_vsetq_lane_f32:
4636     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4637 
4638   case NEON::BI__builtin_neon_vsha1h_u32:
4639     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
4640                         "vsha1h");
4641   case NEON::BI__builtin_neon_vsha1cq_u32:
4642     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
4643                         "vsha1h");
4644   case NEON::BI__builtin_neon_vsha1pq_u32:
4645     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
4646                         "vsha1h");
4647   case NEON::BI__builtin_neon_vsha1mq_u32:
4648     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
4649                         "vsha1h");
4650 
4651   // The ARM _MoveToCoprocessor builtins put the input register value as
4652   // the first argument, but the LLVM intrinsic expects it as the third one.
4653   case ARM::BI_MoveToCoprocessor:
4654   case ARM::BI_MoveToCoprocessor2: {
4655     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
4656                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
4657     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
4658                                   Ops[3], Ops[4], Ops[5]});
4659   }
4660   case ARM::BI_BitScanForward:
4661   case ARM::BI_BitScanForward64:
4662     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
4663   case ARM::BI_BitScanReverse:
4664   case ARM::BI_BitScanReverse64:
4665     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
4666 
4667   case ARM::BI_InterlockedAnd64:
4668     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
4669   case ARM::BI_InterlockedExchange64:
4670     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
4671   case ARM::BI_InterlockedExchangeAdd64:
4672     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
4673   case ARM::BI_InterlockedExchangeSub64:
4674     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
4675   case ARM::BI_InterlockedOr64:
4676     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
4677   case ARM::BI_InterlockedXor64:
4678     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
4679   case ARM::BI_InterlockedDecrement64:
4680     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
4681   case ARM::BI_InterlockedIncrement64:
4682     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
4683   }
4684 
4685   // Get the last argument, which specifies the vector type.
4686   assert(HasExtraArg);
4687   llvm::APSInt Result;
4688   const Expr *Arg = E->getArg(E->getNumArgs()-1);
4689   if (!Arg->isIntegerConstantExpr(Result, getContext()))
4690     return nullptr;
4691 
4692   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
4693       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
4694     // Determine the overloaded type of this builtin.
4695     llvm::Type *Ty;
4696     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
4697       Ty = FloatTy;
4698     else
4699       Ty = DoubleTy;
4700 
4701     // Determine whether this is an unsigned conversion or not.
4702     bool usgn = Result.getZExtValue() == 1;
4703     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
4704 
4705     // Call the appropriate intrinsic.
4706     Function *F = CGM.getIntrinsic(Int, Ty);
4707     return Builder.CreateCall(F, Ops, "vcvtr");
4708   }
4709 
4710   // Determine the type of this overloaded NEON intrinsic.
4711   NeonTypeFlags Type(Result.getZExtValue());
4712   bool usgn = Type.isUnsigned();
4713   bool rightShift = false;
4714 
4715   llvm::VectorType *VTy = GetNeonType(this, Type);
4716   llvm::Type *Ty = VTy;
4717   if (!Ty)
4718     return nullptr;
4719 
4720   // Many NEON builtins have identical semantics and uses in ARM and
4721   // AArch64. Emit these in a single function.
4722   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
4723   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4724       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
4725   if (Builtin)
4726     return EmitCommonNeonBuiltinExpr(
4727         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
4728         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
4729 
4730   unsigned Int;
4731   switch (BuiltinID) {
4732   default: return nullptr;
4733   case NEON::BI__builtin_neon_vld1q_lane_v:
4734     // Handle 64-bit integer elements as a special case.  Use shuffles of
4735     // one-element vectors to avoid poor code for i64 in the backend.
4736     if (VTy->getElementType()->isIntegerTy(64)) {
4737       // Extract the other lane.
4738       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4739       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
4740       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
4741       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4742       // Load the value as a one-element vector.
4743       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
4744       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4745       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
4746       Value *Align = getAlignmentValue32(PtrOp0);
4747       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
4748       // Combine them.
4749       uint32_t Indices[] = {1 - Lane, Lane};
4750       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
4751       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
4752     }
4753     // fall through
4754   case NEON::BI__builtin_neon_vld1_lane_v: {
4755     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4756     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
4757     Value *Ld = Builder.CreateLoad(PtrOp0);
4758     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
4759   }
4760   case NEON::BI__builtin_neon_vld2_dup_v:
4761   case NEON::BI__builtin_neon_vld3_dup_v:
4762   case NEON::BI__builtin_neon_vld4_dup_v: {
4763     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
4764     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
4765       switch (BuiltinID) {
4766       case NEON::BI__builtin_neon_vld2_dup_v:
4767         Int = Intrinsic::arm_neon_vld2;
4768         break;
4769       case NEON::BI__builtin_neon_vld3_dup_v:
4770         Int = Intrinsic::arm_neon_vld3;
4771         break;
4772       case NEON::BI__builtin_neon_vld4_dup_v:
4773         Int = Intrinsic::arm_neon_vld4;
4774         break;
4775       default: llvm_unreachable("unknown vld_dup intrinsic?");
4776       }
4777       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4778       Function *F = CGM.getIntrinsic(Int, Tys);
4779       llvm::Value *Align = getAlignmentValue32(PtrOp1);
4780       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
4781       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4782       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4783       return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4784     }
4785     switch (BuiltinID) {
4786     case NEON::BI__builtin_neon_vld2_dup_v:
4787       Int = Intrinsic::arm_neon_vld2lane;
4788       break;
4789     case NEON::BI__builtin_neon_vld3_dup_v:
4790       Int = Intrinsic::arm_neon_vld3lane;
4791       break;
4792     case NEON::BI__builtin_neon_vld4_dup_v:
4793       Int = Intrinsic::arm_neon_vld4lane;
4794       break;
4795     default: llvm_unreachable("unknown vld_dup intrinsic?");
4796     }
4797     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4798     Function *F = CGM.getIntrinsic(Int, Tys);
4799     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
4800 
4801     SmallVector<Value*, 6> Args;
4802     Args.push_back(Ops[1]);
4803     Args.append(STy->getNumElements(), UndefValue::get(Ty));
4804 
4805     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
4806     Args.push_back(CI);
4807     Args.push_back(getAlignmentValue32(PtrOp1));
4808 
4809     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
4810     // splat lane 0 to all elts in each vector of the result.
4811     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
4812       Value *Val = Builder.CreateExtractValue(Ops[1], i);
4813       Value *Elt = Builder.CreateBitCast(Val, Ty);
4814       Elt = EmitNeonSplat(Elt, CI);
4815       Elt = Builder.CreateBitCast(Elt, Val->getType());
4816       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
4817     }
4818     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4819     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4820     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4821   }
4822   case NEON::BI__builtin_neon_vqrshrn_n_v:
4823     Int =
4824       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
4825     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
4826                         1, true);
4827   case NEON::BI__builtin_neon_vqrshrun_n_v:
4828     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
4829                         Ops, "vqrshrun_n", 1, true);
4830   case NEON::BI__builtin_neon_vqshrn_n_v:
4831     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
4832     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
4833                         1, true);
4834   case NEON::BI__builtin_neon_vqshrun_n_v:
4835     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
4836                         Ops, "vqshrun_n", 1, true);
4837   case NEON::BI__builtin_neon_vrecpe_v:
4838   case NEON::BI__builtin_neon_vrecpeq_v:
4839     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
4840                         Ops, "vrecpe");
4841   case NEON::BI__builtin_neon_vrshrn_n_v:
4842     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
4843                         Ops, "vrshrn_n", 1, true);
4844   case NEON::BI__builtin_neon_vrsra_n_v:
4845   case NEON::BI__builtin_neon_vrsraq_n_v:
4846     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4847     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4848     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
4849     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
4850     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
4851     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
4852   case NEON::BI__builtin_neon_vsri_n_v:
4853   case NEON::BI__builtin_neon_vsriq_n_v:
4854     rightShift = true;
4855   case NEON::BI__builtin_neon_vsli_n_v:
4856   case NEON::BI__builtin_neon_vsliq_n_v:
4857     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
4858     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
4859                         Ops, "vsli_n");
4860   case NEON::BI__builtin_neon_vsra_n_v:
4861   case NEON::BI__builtin_neon_vsraq_n_v:
4862     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4863     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
4864     return Builder.CreateAdd(Ops[0], Ops[1]);
4865   case NEON::BI__builtin_neon_vst1q_lane_v:
4866     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
4867     // a one-element vector and avoid poor code for i64 in the backend.
4868     if (VTy->getElementType()->isIntegerTy(64)) {
4869       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4870       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
4871       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4872       Ops[2] = getAlignmentValue32(PtrOp0);
4873       llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
4874       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
4875                                                  Tys), Ops);
4876     }
4877     // fall through
4878   case NEON::BI__builtin_neon_vst1_lane_v: {
4879     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4880     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
4881     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4882     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
4883     return St;
4884   }
4885   case NEON::BI__builtin_neon_vtbl1_v:
4886     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
4887                         Ops, "vtbl1");
4888   case NEON::BI__builtin_neon_vtbl2_v:
4889     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
4890                         Ops, "vtbl2");
4891   case NEON::BI__builtin_neon_vtbl3_v:
4892     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
4893                         Ops, "vtbl3");
4894   case NEON::BI__builtin_neon_vtbl4_v:
4895     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
4896                         Ops, "vtbl4");
4897   case NEON::BI__builtin_neon_vtbx1_v:
4898     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
4899                         Ops, "vtbx1");
4900   case NEON::BI__builtin_neon_vtbx2_v:
4901     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
4902                         Ops, "vtbx2");
4903   case NEON::BI__builtin_neon_vtbx3_v:
4904     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
4905                         Ops, "vtbx3");
4906   case NEON::BI__builtin_neon_vtbx4_v:
4907     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
4908                         Ops, "vtbx4");
4909   }
4910 }
4911 
4912 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
4913                                       const CallExpr *E,
4914                                       SmallVectorImpl<Value *> &Ops) {
4915   unsigned int Int = 0;
4916   const char *s = nullptr;
4917 
4918   switch (BuiltinID) {
4919   default:
4920     return nullptr;
4921   case NEON::BI__builtin_neon_vtbl1_v:
4922   case NEON::BI__builtin_neon_vqtbl1_v:
4923   case NEON::BI__builtin_neon_vqtbl1q_v:
4924   case NEON::BI__builtin_neon_vtbl2_v:
4925   case NEON::BI__builtin_neon_vqtbl2_v:
4926   case NEON::BI__builtin_neon_vqtbl2q_v:
4927   case NEON::BI__builtin_neon_vtbl3_v:
4928   case NEON::BI__builtin_neon_vqtbl3_v:
4929   case NEON::BI__builtin_neon_vqtbl3q_v:
4930   case NEON::BI__builtin_neon_vtbl4_v:
4931   case NEON::BI__builtin_neon_vqtbl4_v:
4932   case NEON::BI__builtin_neon_vqtbl4q_v:
4933     break;
4934   case NEON::BI__builtin_neon_vtbx1_v:
4935   case NEON::BI__builtin_neon_vqtbx1_v:
4936   case NEON::BI__builtin_neon_vqtbx1q_v:
4937   case NEON::BI__builtin_neon_vtbx2_v:
4938   case NEON::BI__builtin_neon_vqtbx2_v:
4939   case NEON::BI__builtin_neon_vqtbx2q_v:
4940   case NEON::BI__builtin_neon_vtbx3_v:
4941   case NEON::BI__builtin_neon_vqtbx3_v:
4942   case NEON::BI__builtin_neon_vqtbx3q_v:
4943   case NEON::BI__builtin_neon_vtbx4_v:
4944   case NEON::BI__builtin_neon_vqtbx4_v:
4945   case NEON::BI__builtin_neon_vqtbx4q_v:
4946     break;
4947   }
4948 
4949   assert(E->getNumArgs() >= 3);
4950 
4951   // Get the last argument, which specifies the vector type.
4952   llvm::APSInt Result;
4953   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
4954   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
4955     return nullptr;
4956 
4957   // Determine the type of this overloaded NEON intrinsic.
4958   NeonTypeFlags Type(Result.getZExtValue());
4959   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
4960   if (!Ty)
4961     return nullptr;
4962 
4963   CodeGen::CGBuilderTy &Builder = CGF.Builder;
4964 
4965   // AArch64 scalar builtins are not overloaded, they do not have an extra
4966   // argument that specifies the vector type, need to handle each case.
4967   switch (BuiltinID) {
4968   case NEON::BI__builtin_neon_vtbl1_v: {
4969     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
4970                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
4971                               "vtbl1");
4972   }
4973   case NEON::BI__builtin_neon_vtbl2_v: {
4974     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
4975                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
4976                               "vtbl1");
4977   }
4978   case NEON::BI__builtin_neon_vtbl3_v: {
4979     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
4980                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
4981                               "vtbl2");
4982   }
4983   case NEON::BI__builtin_neon_vtbl4_v: {
4984     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
4985                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
4986                               "vtbl2");
4987   }
4988   case NEON::BI__builtin_neon_vtbx1_v: {
4989     Value *TblRes =
4990         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
4991                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
4992 
4993     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
4994     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
4995     CmpRes = Builder.CreateSExt(CmpRes, Ty);
4996 
4997     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4998     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4999     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5000   }
5001   case NEON::BI__builtin_neon_vtbx2_v: {
5002     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
5003                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
5004                               "vtbx1");
5005   }
5006   case NEON::BI__builtin_neon_vtbx3_v: {
5007     Value *TblRes =
5008         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
5009                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
5010 
5011     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
5012     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
5013                                            TwentyFourV);
5014     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5015 
5016     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5017     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5018     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5019   }
5020   case NEON::BI__builtin_neon_vtbx4_v: {
5021     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
5022                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
5023                               "vtbx2");
5024   }
5025   case NEON::BI__builtin_neon_vqtbl1_v:
5026   case NEON::BI__builtin_neon_vqtbl1q_v:
5027     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
5028   case NEON::BI__builtin_neon_vqtbl2_v:
5029   case NEON::BI__builtin_neon_vqtbl2q_v: {
5030     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
5031   case NEON::BI__builtin_neon_vqtbl3_v:
5032   case NEON::BI__builtin_neon_vqtbl3q_v:
5033     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
5034   case NEON::BI__builtin_neon_vqtbl4_v:
5035   case NEON::BI__builtin_neon_vqtbl4q_v:
5036     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
5037   case NEON::BI__builtin_neon_vqtbx1_v:
5038   case NEON::BI__builtin_neon_vqtbx1q_v:
5039     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
5040   case NEON::BI__builtin_neon_vqtbx2_v:
5041   case NEON::BI__builtin_neon_vqtbx2q_v:
5042     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
5043   case NEON::BI__builtin_neon_vqtbx3_v:
5044   case NEON::BI__builtin_neon_vqtbx3q_v:
5045     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
5046   case NEON::BI__builtin_neon_vqtbx4_v:
5047   case NEON::BI__builtin_neon_vqtbx4q_v:
5048     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
5049   }
5050   }
5051 
5052   if (!Int)
5053     return nullptr;
5054 
5055   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
5056   return CGF.EmitNeonCall(F, Ops, s);
5057 }
5058 
5059 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
5060   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
5061   Op = Builder.CreateBitCast(Op, Int16Ty);
5062   Value *V = UndefValue::get(VTy);
5063   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
5064   Op = Builder.CreateInsertElement(V, Op, CI);
5065   return Op;
5066 }
5067 
5068 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
5069                                                const CallExpr *E) {
5070   unsigned HintID = static_cast<unsigned>(-1);
5071   switch (BuiltinID) {
5072   default: break;
5073   case AArch64::BI__builtin_arm_nop:
5074     HintID = 0;
5075     break;
5076   case AArch64::BI__builtin_arm_yield:
5077     HintID = 1;
5078     break;
5079   case AArch64::BI__builtin_arm_wfe:
5080     HintID = 2;
5081     break;
5082   case AArch64::BI__builtin_arm_wfi:
5083     HintID = 3;
5084     break;
5085   case AArch64::BI__builtin_arm_sev:
5086     HintID = 4;
5087     break;
5088   case AArch64::BI__builtin_arm_sevl:
5089     HintID = 5;
5090     break;
5091   }
5092 
5093   if (HintID != static_cast<unsigned>(-1)) {
5094     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5095     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5096   }
5097 
5098   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
5099     Value *Address         = EmitScalarExpr(E->getArg(0));
5100     Value *RW              = EmitScalarExpr(E->getArg(1));
5101     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
5102     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
5103     Value *IsData          = EmitScalarExpr(E->getArg(4));
5104 
5105     Value *Locality = nullptr;
5106     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
5107       // Temporal fetch, needs to convert cache level to locality.
5108       Locality = llvm::ConstantInt::get(Int32Ty,
5109         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
5110     } else {
5111       // Streaming fetch.
5112       Locality = llvm::ConstantInt::get(Int32Ty, 0);
5113     }
5114 
5115     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
5116     // PLDL3STRM or PLDL2STRM.
5117     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
5118     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5119   }
5120 
5121   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
5122     assert((getContext().getTypeSize(E->getType()) == 32) &&
5123            "rbit of unusual size!");
5124     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5125     return Builder.CreateCall(
5126         CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
5127   }
5128   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
5129     assert((getContext().getTypeSize(E->getType()) == 64) &&
5130            "rbit of unusual size!");
5131     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5132     return Builder.CreateCall(
5133         CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
5134   }
5135 
5136   if (BuiltinID == AArch64::BI__clear_cache) {
5137     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5138     const FunctionDecl *FD = E->getDirectCallee();
5139     Value *Ops[2];
5140     for (unsigned i = 0; i < 2; i++)
5141       Ops[i] = EmitScalarExpr(E->getArg(i));
5142     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5143     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5144     StringRef Name = FD->getName();
5145     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5146   }
5147 
5148   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5149       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
5150       getContext().getTypeSize(E->getType()) == 128) {
5151     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5152                                        ? Intrinsic::aarch64_ldaxp
5153                                        : Intrinsic::aarch64_ldxp);
5154 
5155     Value *LdPtr = EmitScalarExpr(E->getArg(0));
5156     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
5157                                     "ldxp");
5158 
5159     Value *Val0 = Builder.CreateExtractValue(Val, 1);
5160     Value *Val1 = Builder.CreateExtractValue(Val, 0);
5161     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5162     Val0 = Builder.CreateZExt(Val0, Int128Ty);
5163     Val1 = Builder.CreateZExt(Val1, Int128Ty);
5164 
5165     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5166     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5167     Val = Builder.CreateOr(Val, Val1);
5168     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5169   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5170              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
5171     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5172 
5173     QualType Ty = E->getType();
5174     llvm::Type *RealResTy = ConvertType(Ty);
5175     llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
5176                                                   getContext().getTypeSize(Ty));
5177     LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
5178 
5179     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5180                                        ? Intrinsic::aarch64_ldaxr
5181                                        : Intrinsic::aarch64_ldxr,
5182                                    LoadAddr->getType());
5183     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5184 
5185     if (RealResTy->isPointerTy())
5186       return Builder.CreateIntToPtr(Val, RealResTy);
5187 
5188     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
5189     return Builder.CreateBitCast(Val, RealResTy);
5190   }
5191 
5192   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
5193        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
5194       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5195     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5196                                        ? Intrinsic::aarch64_stlxp
5197                                        : Intrinsic::aarch64_stxp);
5198     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr);
5199 
5200     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5201     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5202 
5203     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
5204     llvm::Value *Val = Builder.CreateLoad(Tmp);
5205 
5206     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5207     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5208     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
5209                                          Int8PtrTy);
5210     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5211   }
5212 
5213   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
5214       BuiltinID == AArch64::BI__builtin_arm_stlex) {
5215     Value *StoreVal = EmitScalarExpr(E->getArg(0));
5216     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5217 
5218     QualType Ty = E->getArg(0)->getType();
5219     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
5220                                                  getContext().getTypeSize(Ty));
5221     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
5222 
5223     if (StoreVal->getType()->isPointerTy())
5224       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5225     else {
5226       StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
5227       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5228     }
5229 
5230     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5231                                        ? Intrinsic::aarch64_stlxr
5232                                        : Intrinsic::aarch64_stxr,
5233                                    StoreAddr->getType());
5234     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5235   }
5236 
5237   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
5238     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5239     return Builder.CreateCall(F);
5240   }
5241 
5242   // CRC32
5243   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5244   switch (BuiltinID) {
5245   case AArch64::BI__builtin_arm_crc32b:
5246     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5247   case AArch64::BI__builtin_arm_crc32cb:
5248     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5249   case AArch64::BI__builtin_arm_crc32h:
5250     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5251   case AArch64::BI__builtin_arm_crc32ch:
5252     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5253   case AArch64::BI__builtin_arm_crc32w:
5254     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5255   case AArch64::BI__builtin_arm_crc32cw:
5256     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5257   case AArch64::BI__builtin_arm_crc32d:
5258     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5259   case AArch64::BI__builtin_arm_crc32cd:
5260     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5261   }
5262 
5263   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5264     Value *Arg0 = EmitScalarExpr(E->getArg(0));
5265     Value *Arg1 = EmitScalarExpr(E->getArg(1));
5266     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5267 
5268     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5269     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5270 
5271     return Builder.CreateCall(F, {Arg0, Arg1});
5272   }
5273 
5274   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
5275       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5276       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5277       BuiltinID == AArch64::BI__builtin_arm_wsr ||
5278       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
5279       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
5280 
5281     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
5282                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5283                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
5284 
5285     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5286                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
5287 
5288     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
5289                    BuiltinID != AArch64::BI__builtin_arm_wsr;
5290 
5291     llvm::Type *ValueType;
5292     llvm::Type *RegisterType = Int64Ty;
5293     if (IsPointerBuiltin) {
5294       ValueType = VoidPtrTy;
5295     } else if (Is64Bit) {
5296       ValueType = Int64Ty;
5297     } else {
5298       ValueType = Int32Ty;
5299     }
5300 
5301     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5302   }
5303 
5304   // Find out if any arguments are required to be integer constant
5305   // expressions.
5306   unsigned ICEArguments = 0;
5307   ASTContext::GetBuiltinTypeError Error;
5308   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5309   assert(Error == ASTContext::GE_None && "Should not codegen an error");
5310 
5311   llvm::SmallVector<Value*, 4> Ops;
5312   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5313     if ((ICEArguments & (1 << i)) == 0) {
5314       Ops.push_back(EmitScalarExpr(E->getArg(i)));
5315     } else {
5316       // If this is required to be a constant, constant fold it so that we know
5317       // that the generated intrinsic gets a ConstantInt.
5318       llvm::APSInt Result;
5319       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5320       assert(IsConst && "Constant arg isn't actually constant?");
5321       (void)IsConst;
5322       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5323     }
5324   }
5325 
5326   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
5327   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5328       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5329 
5330   if (Builtin) {
5331     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5332     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
5333     assert(Result && "SISD intrinsic should have been handled");
5334     return Result;
5335   }
5336 
5337   llvm::APSInt Result;
5338   const Expr *Arg = E->getArg(E->getNumArgs()-1);
5339   NeonTypeFlags Type(0);
5340   if (Arg->isIntegerConstantExpr(Result, getContext()))
5341     // Determine the type of this overloaded NEON intrinsic.
5342     Type = NeonTypeFlags(Result.getZExtValue());
5343 
5344   bool usgn = Type.isUnsigned();
5345   bool quad = Type.isQuad();
5346 
5347   // Handle non-overloaded intrinsics first.
5348   switch (BuiltinID) {
5349   default: break;
5350   case NEON::BI__builtin_neon_vldrq_p128: {
5351     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5352     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
5353     return Builder.CreateDefaultAlignedLoad(Ptr);
5354   }
5355   case NEON::BI__builtin_neon_vstrq_p128: {
5356     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5357     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
5358     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5359   }
5360   case NEON::BI__builtin_neon_vcvts_u32_f32:
5361   case NEON::BI__builtin_neon_vcvtd_u64_f64:
5362     usgn = true;
5363     // FALL THROUGH
5364   case NEON::BI__builtin_neon_vcvts_s32_f32:
5365   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
5366     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5367     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5368     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5369     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5370     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
5371     if (usgn)
5372       return Builder.CreateFPToUI(Ops[0], InTy);
5373     return Builder.CreateFPToSI(Ops[0], InTy);
5374   }
5375   case NEON::BI__builtin_neon_vcvts_f32_u32:
5376   case NEON::BI__builtin_neon_vcvtd_f64_u64:
5377     usgn = true;
5378     // FALL THROUGH
5379   case NEON::BI__builtin_neon_vcvts_f32_s32:
5380   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5381     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5382     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5383     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5384     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5385     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5386     if (usgn)
5387       return Builder.CreateUIToFP(Ops[0], FTy);
5388     return Builder.CreateSIToFP(Ops[0], FTy);
5389   }
5390   case NEON::BI__builtin_neon_vpaddd_s64: {
5391     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
5392     Value *Vec = EmitScalarExpr(E->getArg(0));
5393     // The vector is v2f64, so make sure it's bitcast to that.
5394     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5395     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5396     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5397     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5398     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5399     // Pairwise addition of a v2f64 into a scalar f64.
5400     return Builder.CreateAdd(Op0, Op1, "vpaddd");
5401   }
5402   case NEON::BI__builtin_neon_vpaddd_f64: {
5403     llvm::Type *Ty =
5404       llvm::VectorType::get(DoubleTy, 2);
5405     Value *Vec = EmitScalarExpr(E->getArg(0));
5406     // The vector is v2f64, so make sure it's bitcast to that.
5407     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5408     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5409     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5410     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5411     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5412     // Pairwise addition of a v2f64 into a scalar f64.
5413     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5414   }
5415   case NEON::BI__builtin_neon_vpadds_f32: {
5416     llvm::Type *Ty =
5417       llvm::VectorType::get(FloatTy, 2);
5418     Value *Vec = EmitScalarExpr(E->getArg(0));
5419     // The vector is v2f32, so make sure it's bitcast to that.
5420     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5421     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5422     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5423     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5424     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5425     // Pairwise addition of a v2f32 into a scalar f32.
5426     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5427   }
5428   case NEON::BI__builtin_neon_vceqzd_s64:
5429   case NEON::BI__builtin_neon_vceqzd_f64:
5430   case NEON::BI__builtin_neon_vceqzs_f32:
5431     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5432     return EmitAArch64CompareBuiltinExpr(
5433         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5434         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5435   case NEON::BI__builtin_neon_vcgezd_s64:
5436   case NEON::BI__builtin_neon_vcgezd_f64:
5437   case NEON::BI__builtin_neon_vcgezs_f32:
5438     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5439     return EmitAArch64CompareBuiltinExpr(
5440         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5441         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5442   case NEON::BI__builtin_neon_vclezd_s64:
5443   case NEON::BI__builtin_neon_vclezd_f64:
5444   case NEON::BI__builtin_neon_vclezs_f32:
5445     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5446     return EmitAArch64CompareBuiltinExpr(
5447         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5448         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
5449   case NEON::BI__builtin_neon_vcgtzd_s64:
5450   case NEON::BI__builtin_neon_vcgtzd_f64:
5451   case NEON::BI__builtin_neon_vcgtzs_f32:
5452     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5453     return EmitAArch64CompareBuiltinExpr(
5454         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5455         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
5456   case NEON::BI__builtin_neon_vcltzd_s64:
5457   case NEON::BI__builtin_neon_vcltzd_f64:
5458   case NEON::BI__builtin_neon_vcltzs_f32:
5459     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5460     return EmitAArch64CompareBuiltinExpr(
5461         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5462         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
5463 
5464   case NEON::BI__builtin_neon_vceqzd_u64: {
5465     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5466     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5467     Ops[0] =
5468         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
5469     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
5470   }
5471   case NEON::BI__builtin_neon_vceqd_f64:
5472   case NEON::BI__builtin_neon_vcled_f64:
5473   case NEON::BI__builtin_neon_vcltd_f64:
5474   case NEON::BI__builtin_neon_vcged_f64:
5475   case NEON::BI__builtin_neon_vcgtd_f64: {
5476     llvm::CmpInst::Predicate P;
5477     switch (BuiltinID) {
5478     default: llvm_unreachable("missing builtin ID in switch!");
5479     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5480     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5481     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5482     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5483     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5484     }
5485     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5486     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5487     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5488     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5489     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5490   }
5491   case NEON::BI__builtin_neon_vceqs_f32:
5492   case NEON::BI__builtin_neon_vcles_f32:
5493   case NEON::BI__builtin_neon_vclts_f32:
5494   case NEON::BI__builtin_neon_vcges_f32:
5495   case NEON::BI__builtin_neon_vcgts_f32: {
5496     llvm::CmpInst::Predicate P;
5497     switch (BuiltinID) {
5498     default: llvm_unreachable("missing builtin ID in switch!");
5499     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5500     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5501     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5502     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5503     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5504     }
5505     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5506     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5507     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5508     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5509     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5510   }
5511   case NEON::BI__builtin_neon_vceqd_s64:
5512   case NEON::BI__builtin_neon_vceqd_u64:
5513   case NEON::BI__builtin_neon_vcgtd_s64:
5514   case NEON::BI__builtin_neon_vcgtd_u64:
5515   case NEON::BI__builtin_neon_vcltd_s64:
5516   case NEON::BI__builtin_neon_vcltd_u64:
5517   case NEON::BI__builtin_neon_vcged_u64:
5518   case NEON::BI__builtin_neon_vcged_s64:
5519   case NEON::BI__builtin_neon_vcled_u64:
5520   case NEON::BI__builtin_neon_vcled_s64: {
5521     llvm::CmpInst::Predicate P;
5522     switch (BuiltinID) {
5523     default: llvm_unreachable("missing builtin ID in switch!");
5524     case NEON::BI__builtin_neon_vceqd_s64:
5525     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5526     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5527     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5528     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5529     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5530     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5531     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5532     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5533     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5534     }
5535     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5536     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5537     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5538     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5539     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5540   }
5541   case NEON::BI__builtin_neon_vtstd_s64:
5542   case NEON::BI__builtin_neon_vtstd_u64: {
5543     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5544     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5545     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5546     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5547     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5548                                 llvm::Constant::getNullValue(Int64Ty));
5549     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5550   }
5551   case NEON::BI__builtin_neon_vset_lane_i8:
5552   case NEON::BI__builtin_neon_vset_lane_i16:
5553   case NEON::BI__builtin_neon_vset_lane_i32:
5554   case NEON::BI__builtin_neon_vset_lane_i64:
5555   case NEON::BI__builtin_neon_vset_lane_f32:
5556   case NEON::BI__builtin_neon_vsetq_lane_i8:
5557   case NEON::BI__builtin_neon_vsetq_lane_i16:
5558   case NEON::BI__builtin_neon_vsetq_lane_i32:
5559   case NEON::BI__builtin_neon_vsetq_lane_i64:
5560   case NEON::BI__builtin_neon_vsetq_lane_f32:
5561     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5562     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5563   case NEON::BI__builtin_neon_vset_lane_f64:
5564     // The vector type needs a cast for the v1f64 variant.
5565     Ops[1] = Builder.CreateBitCast(Ops[1],
5566                                    llvm::VectorType::get(DoubleTy, 1));
5567     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5568     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5569   case NEON::BI__builtin_neon_vsetq_lane_f64:
5570     // The vector type needs a cast for the v2f64 variant.
5571     Ops[1] = Builder.CreateBitCast(Ops[1],
5572         llvm::VectorType::get(DoubleTy, 2));
5573     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5574     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5575 
5576   case NEON::BI__builtin_neon_vget_lane_i8:
5577   case NEON::BI__builtin_neon_vdupb_lane_i8:
5578     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
5579     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5580                                         "vget_lane");
5581   case NEON::BI__builtin_neon_vgetq_lane_i8:
5582   case NEON::BI__builtin_neon_vdupb_laneq_i8:
5583     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
5584     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5585                                         "vgetq_lane");
5586   case NEON::BI__builtin_neon_vget_lane_i16:
5587   case NEON::BI__builtin_neon_vduph_lane_i16:
5588     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
5589     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5590                                         "vget_lane");
5591   case NEON::BI__builtin_neon_vgetq_lane_i16:
5592   case NEON::BI__builtin_neon_vduph_laneq_i16:
5593     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
5594     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5595                                         "vgetq_lane");
5596   case NEON::BI__builtin_neon_vget_lane_i32:
5597   case NEON::BI__builtin_neon_vdups_lane_i32:
5598     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
5599     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5600                                         "vget_lane");
5601   case NEON::BI__builtin_neon_vdups_lane_f32:
5602     Ops[0] = Builder.CreateBitCast(Ops[0],
5603         llvm::VectorType::get(FloatTy, 2));
5604     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5605                                         "vdups_lane");
5606   case NEON::BI__builtin_neon_vgetq_lane_i32:
5607   case NEON::BI__builtin_neon_vdups_laneq_i32:
5608     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
5609     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5610                                         "vgetq_lane");
5611   case NEON::BI__builtin_neon_vget_lane_i64:
5612   case NEON::BI__builtin_neon_vdupd_lane_i64:
5613     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
5614     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5615                                         "vget_lane");
5616   case NEON::BI__builtin_neon_vdupd_lane_f64:
5617     Ops[0] = Builder.CreateBitCast(Ops[0],
5618         llvm::VectorType::get(DoubleTy, 1));
5619     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5620                                         "vdupd_lane");
5621   case NEON::BI__builtin_neon_vgetq_lane_i64:
5622   case NEON::BI__builtin_neon_vdupd_laneq_i64:
5623     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
5624     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5625                                         "vgetq_lane");
5626   case NEON::BI__builtin_neon_vget_lane_f32:
5627     Ops[0] = Builder.CreateBitCast(Ops[0],
5628         llvm::VectorType::get(FloatTy, 2));
5629     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5630                                         "vget_lane");
5631   case NEON::BI__builtin_neon_vget_lane_f64:
5632     Ops[0] = Builder.CreateBitCast(Ops[0],
5633         llvm::VectorType::get(DoubleTy, 1));
5634     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5635                                         "vget_lane");
5636   case NEON::BI__builtin_neon_vgetq_lane_f32:
5637   case NEON::BI__builtin_neon_vdups_laneq_f32:
5638     Ops[0] = Builder.CreateBitCast(Ops[0],
5639         llvm::VectorType::get(FloatTy, 4));
5640     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5641                                         "vgetq_lane");
5642   case NEON::BI__builtin_neon_vgetq_lane_f64:
5643   case NEON::BI__builtin_neon_vdupd_laneq_f64:
5644     Ops[0] = Builder.CreateBitCast(Ops[0],
5645         llvm::VectorType::get(DoubleTy, 2));
5646     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5647                                         "vgetq_lane");
5648   case NEON::BI__builtin_neon_vaddd_s64:
5649   case NEON::BI__builtin_neon_vaddd_u64:
5650     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
5651   case NEON::BI__builtin_neon_vsubd_s64:
5652   case NEON::BI__builtin_neon_vsubd_u64:
5653     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
5654   case NEON::BI__builtin_neon_vqdmlalh_s16:
5655   case NEON::BI__builtin_neon_vqdmlslh_s16: {
5656     SmallVector<Value *, 2> ProductOps;
5657     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5658     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
5659     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5660     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5661                           ProductOps, "vqdmlXl");
5662     Constant *CI = ConstantInt::get(SizeTy, 0);
5663     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5664 
5665     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5666                                         ? Intrinsic::aarch64_neon_sqadd
5667                                         : Intrinsic::aarch64_neon_sqsub;
5668     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5669   }
5670   case NEON::BI__builtin_neon_vqshlud_n_s64: {
5671     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5672     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5673     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5674                         Ops, "vqshlu_n");
5675   }
5676   case NEON::BI__builtin_neon_vqshld_n_u64:
5677   case NEON::BI__builtin_neon_vqshld_n_s64: {
5678     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5679                                    ? Intrinsic::aarch64_neon_uqshl
5680                                    : Intrinsic::aarch64_neon_sqshl;
5681     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5682     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5683     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5684   }
5685   case NEON::BI__builtin_neon_vrshrd_n_u64:
5686   case NEON::BI__builtin_neon_vrshrd_n_s64: {
5687     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5688                                    ? Intrinsic::aarch64_neon_urshl
5689                                    : Intrinsic::aarch64_neon_srshl;
5690     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5691     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5692     Ops[1] = ConstantInt::get(Int64Ty, -SV);
5693     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5694   }
5695   case NEON::BI__builtin_neon_vrsrad_n_u64:
5696   case NEON::BI__builtin_neon_vrsrad_n_s64: {
5697     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5698                                    ? Intrinsic::aarch64_neon_urshl
5699                                    : Intrinsic::aarch64_neon_srshl;
5700     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5701     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
5702     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5703                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5704     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5705   }
5706   case NEON::BI__builtin_neon_vshld_n_s64:
5707   case NEON::BI__builtin_neon_vshld_n_u64: {
5708     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5709     return Builder.CreateShl(
5710         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5711   }
5712   case NEON::BI__builtin_neon_vshrd_n_s64: {
5713     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5714     return Builder.CreateAShr(
5715         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5716                                                    Amt->getZExtValue())),
5717         "shrd_n");
5718   }
5719   case NEON::BI__builtin_neon_vshrd_n_u64: {
5720     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5721     uint64_t ShiftAmt = Amt->getZExtValue();
5722     // Right-shifting an unsigned value by its size yields 0.
5723     if (ShiftAmt == 64)
5724       return ConstantInt::get(Int64Ty, 0);
5725     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
5726                               "shrd_n");
5727   }
5728   case NEON::BI__builtin_neon_vsrad_n_s64: {
5729     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5730     Ops[1] = Builder.CreateAShr(
5731         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5732                                                    Amt->getZExtValue())),
5733         "shrd_n");
5734     return Builder.CreateAdd(Ops[0], Ops[1]);
5735   }
5736   case NEON::BI__builtin_neon_vsrad_n_u64: {
5737     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5738     uint64_t ShiftAmt = Amt->getZExtValue();
5739     // Right-shifting an unsigned value by its size yields 0.
5740     // As Op + 0 = Op, return Ops[0] directly.
5741     if (ShiftAmt == 64)
5742       return Ops[0];
5743     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
5744                                 "shrd_n");
5745     return Builder.CreateAdd(Ops[0], Ops[1]);
5746   }
5747   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5748   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5749   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5750   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5751     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5752                                           "lane");
5753     SmallVector<Value *, 2> ProductOps;
5754     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5755     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5756     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5757     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5758                           ProductOps, "vqdmlXl");
5759     Constant *CI = ConstantInt::get(SizeTy, 0);
5760     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5761     Ops.pop_back();
5762 
5763     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5764                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5765                           ? Intrinsic::aarch64_neon_sqadd
5766                           : Intrinsic::aarch64_neon_sqsub;
5767     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
5768   }
5769   case NEON::BI__builtin_neon_vqdmlals_s32:
5770   case NEON::BI__builtin_neon_vqdmlsls_s32: {
5771     SmallVector<Value *, 2> ProductOps;
5772     ProductOps.push_back(Ops[1]);
5773     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
5774     Ops[1] =
5775         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5776                      ProductOps, "vqdmlXl");
5777 
5778     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5779                                         ? Intrinsic::aarch64_neon_sqadd
5780                                         : Intrinsic::aarch64_neon_sqsub;
5781     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
5782   }
5783   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5784   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5785   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5786   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5787     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5788                                           "lane");
5789     SmallVector<Value *, 2> ProductOps;
5790     ProductOps.push_back(Ops[1]);
5791     ProductOps.push_back(Ops[2]);
5792     Ops[1] =
5793         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5794                      ProductOps, "vqdmlXl");
5795     Ops.pop_back();
5796 
5797     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
5798                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
5799                           ? Intrinsic::aarch64_neon_sqadd
5800                           : Intrinsic::aarch64_neon_sqsub;
5801     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
5802   }
5803   }
5804 
5805   llvm::VectorType *VTy = GetNeonType(this, Type);
5806   llvm::Type *Ty = VTy;
5807   if (!Ty)
5808     return nullptr;
5809 
5810   // Not all intrinsics handled by the common case work for AArch64 yet, so only
5811   // defer to common code if it's been added to our special map.
5812   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
5813                                    AArch64SIMDIntrinsicsProvenSorted);
5814 
5815   if (Builtin)
5816     return EmitCommonNeonBuiltinExpr(
5817         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5818         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
5819         /*never use addresses*/ Address::invalid(), Address::invalid());
5820 
5821   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
5822     return V;
5823 
5824   unsigned Int;
5825   switch (BuiltinID) {
5826   default: return nullptr;
5827   case NEON::BI__builtin_neon_vbsl_v:
5828   case NEON::BI__builtin_neon_vbslq_v: {
5829     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
5830     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
5831     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
5832     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
5833 
5834     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
5835     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
5836     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
5837     return Builder.CreateBitCast(Ops[0], Ty);
5838   }
5839   case NEON::BI__builtin_neon_vfma_lane_v:
5840   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
5841     // The ARM builtins (and instructions) have the addend as the first
5842     // operand, but the 'fma' intrinsics have it last. Swap it around here.
5843     Value *Addend = Ops[0];
5844     Value *Multiplicand = Ops[1];
5845     Value *LaneSource = Ops[2];
5846     Ops[0] = Multiplicand;
5847     Ops[1] = LaneSource;
5848     Ops[2] = Addend;
5849 
5850     // Now adjust things to handle the lane access.
5851     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
5852       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
5853       VTy;
5854     llvm::Constant *cst = cast<Constant>(Ops[3]);
5855     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
5856     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
5857     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
5858 
5859     Ops.pop_back();
5860     Int = Intrinsic::fma;
5861     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
5862   }
5863   case NEON::BI__builtin_neon_vfma_laneq_v: {
5864     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
5865     // v1f64 fma should be mapped to Neon scalar f64 fma
5866     if (VTy && VTy->getElementType() == DoubleTy) {
5867       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5868       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5869       llvm::Type *VTy = GetNeonType(this,
5870         NeonTypeFlags(NeonTypeFlags::Float64, false, true));
5871       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
5872       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
5873       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
5874       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5875       return Builder.CreateBitCast(Result, Ty);
5876     }
5877     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5878     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5879     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5880 
5881     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
5882                                             VTy->getNumElements() * 2);
5883     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
5884     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
5885                                                cast<ConstantInt>(Ops[3]));
5886     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
5887 
5888     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
5889   }
5890   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
5891     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5892     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5893     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5894 
5895     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5896     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
5897     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
5898   }
5899   case NEON::BI__builtin_neon_vfmas_lane_f32:
5900   case NEON::BI__builtin_neon_vfmas_laneq_f32:
5901   case NEON::BI__builtin_neon_vfmad_lane_f64:
5902   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
5903     Ops.push_back(EmitScalarExpr(E->getArg(3)));
5904     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
5905     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5906     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
5907     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5908   }
5909   case NEON::BI__builtin_neon_vmull_v:
5910     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5911     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
5912     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
5913     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
5914   case NEON::BI__builtin_neon_vmax_v:
5915   case NEON::BI__builtin_neon_vmaxq_v:
5916     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5917     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
5918     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
5919     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
5920   case NEON::BI__builtin_neon_vmin_v:
5921   case NEON::BI__builtin_neon_vminq_v:
5922     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5923     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
5924     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
5925     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
5926   case NEON::BI__builtin_neon_vabd_v:
5927   case NEON::BI__builtin_neon_vabdq_v:
5928     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5929     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
5930     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
5931     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
5932   case NEON::BI__builtin_neon_vpadal_v:
5933   case NEON::BI__builtin_neon_vpadalq_v: {
5934     unsigned ArgElts = VTy->getNumElements();
5935     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
5936     unsigned BitWidth = EltTy->getBitWidth();
5937     llvm::Type *ArgTy = llvm::VectorType::get(
5938         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
5939     llvm::Type* Tys[2] = { VTy, ArgTy };
5940     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
5941     SmallVector<llvm::Value*, 1> TmpOps;
5942     TmpOps.push_back(Ops[1]);
5943     Function *F = CGM.getIntrinsic(Int, Tys);
5944     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
5945     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
5946     return Builder.CreateAdd(tmp, addend);
5947   }
5948   case NEON::BI__builtin_neon_vpmin_v:
5949   case NEON::BI__builtin_neon_vpminq_v:
5950     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5951     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
5952     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
5953     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
5954   case NEON::BI__builtin_neon_vpmax_v:
5955   case NEON::BI__builtin_neon_vpmaxq_v:
5956     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5957     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
5958     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
5959     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
5960   case NEON::BI__builtin_neon_vminnm_v:
5961   case NEON::BI__builtin_neon_vminnmq_v:
5962     Int = Intrinsic::aarch64_neon_fminnm;
5963     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
5964   case NEON::BI__builtin_neon_vmaxnm_v:
5965   case NEON::BI__builtin_neon_vmaxnmq_v:
5966     Int = Intrinsic::aarch64_neon_fmaxnm;
5967     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
5968   case NEON::BI__builtin_neon_vrecpss_f32: {
5969     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5970     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
5971                         Ops, "vrecps");
5972   }
5973   case NEON::BI__builtin_neon_vrecpsd_f64: {
5974     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5975     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
5976                         Ops, "vrecps");
5977   }
5978   case NEON::BI__builtin_neon_vqshrun_n_v:
5979     Int = Intrinsic::aarch64_neon_sqshrun;
5980     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
5981   case NEON::BI__builtin_neon_vqrshrun_n_v:
5982     Int = Intrinsic::aarch64_neon_sqrshrun;
5983     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
5984   case NEON::BI__builtin_neon_vqshrn_n_v:
5985     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
5986     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
5987   case NEON::BI__builtin_neon_vrshrn_n_v:
5988     Int = Intrinsic::aarch64_neon_rshrn;
5989     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
5990   case NEON::BI__builtin_neon_vqrshrn_n_v:
5991     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
5992     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
5993   case NEON::BI__builtin_neon_vrnda_v:
5994   case NEON::BI__builtin_neon_vrndaq_v: {
5995     Int = Intrinsic::round;
5996     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
5997   }
5998   case NEON::BI__builtin_neon_vrndi_v:
5999   case NEON::BI__builtin_neon_vrndiq_v: {
6000     Int = Intrinsic::nearbyint;
6001     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
6002   }
6003   case NEON::BI__builtin_neon_vrndm_v:
6004   case NEON::BI__builtin_neon_vrndmq_v: {
6005     Int = Intrinsic::floor;
6006     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6007   }
6008   case NEON::BI__builtin_neon_vrndn_v:
6009   case NEON::BI__builtin_neon_vrndnq_v: {
6010     Int = Intrinsic::aarch64_neon_frintn;
6011     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6012   }
6013   case NEON::BI__builtin_neon_vrndp_v:
6014   case NEON::BI__builtin_neon_vrndpq_v: {
6015     Int = Intrinsic::ceil;
6016     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6017   }
6018   case NEON::BI__builtin_neon_vrndx_v:
6019   case NEON::BI__builtin_neon_vrndxq_v: {
6020     Int = Intrinsic::rint;
6021     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6022   }
6023   case NEON::BI__builtin_neon_vrnd_v:
6024   case NEON::BI__builtin_neon_vrndq_v: {
6025     Int = Intrinsic::trunc;
6026     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6027   }
6028   case NEON::BI__builtin_neon_vceqz_v:
6029   case NEON::BI__builtin_neon_vceqzq_v:
6030     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
6031                                          ICmpInst::ICMP_EQ, "vceqz");
6032   case NEON::BI__builtin_neon_vcgez_v:
6033   case NEON::BI__builtin_neon_vcgezq_v:
6034     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
6035                                          ICmpInst::ICMP_SGE, "vcgez");
6036   case NEON::BI__builtin_neon_vclez_v:
6037   case NEON::BI__builtin_neon_vclezq_v:
6038     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
6039                                          ICmpInst::ICMP_SLE, "vclez");
6040   case NEON::BI__builtin_neon_vcgtz_v:
6041   case NEON::BI__builtin_neon_vcgtzq_v:
6042     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
6043                                          ICmpInst::ICMP_SGT, "vcgtz");
6044   case NEON::BI__builtin_neon_vcltz_v:
6045   case NEON::BI__builtin_neon_vcltzq_v:
6046     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
6047                                          ICmpInst::ICMP_SLT, "vcltz");
6048   case NEON::BI__builtin_neon_vcvt_f64_v:
6049   case NEON::BI__builtin_neon_vcvtq_f64_v:
6050     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6051     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6052     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6053                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6054   case NEON::BI__builtin_neon_vcvt_f64_f32: {
6055     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6056            "unexpected vcvt_f64_f32 builtin");
6057     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6058     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6059 
6060     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6061   }
6062   case NEON::BI__builtin_neon_vcvt_f32_f64: {
6063     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6064            "unexpected vcvt_f32_f64 builtin");
6065     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6066     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6067 
6068     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6069   }
6070   case NEON::BI__builtin_neon_vcvt_s32_v:
6071   case NEON::BI__builtin_neon_vcvt_u32_v:
6072   case NEON::BI__builtin_neon_vcvt_s64_v:
6073   case NEON::BI__builtin_neon_vcvt_u64_v:
6074   case NEON::BI__builtin_neon_vcvtq_s32_v:
6075   case NEON::BI__builtin_neon_vcvtq_u32_v:
6076   case NEON::BI__builtin_neon_vcvtq_s64_v:
6077   case NEON::BI__builtin_neon_vcvtq_u64_v: {
6078     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
6079     if (usgn)
6080       return Builder.CreateFPToUI(Ops[0], Ty);
6081     return Builder.CreateFPToSI(Ops[0], Ty);
6082   }
6083   case NEON::BI__builtin_neon_vcvta_s32_v:
6084   case NEON::BI__builtin_neon_vcvtaq_s32_v:
6085   case NEON::BI__builtin_neon_vcvta_u32_v:
6086   case NEON::BI__builtin_neon_vcvtaq_u32_v:
6087   case NEON::BI__builtin_neon_vcvta_s64_v:
6088   case NEON::BI__builtin_neon_vcvtaq_s64_v:
6089   case NEON::BI__builtin_neon_vcvta_u64_v:
6090   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6091     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6092     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6093     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6094   }
6095   case NEON::BI__builtin_neon_vcvtm_s32_v:
6096   case NEON::BI__builtin_neon_vcvtmq_s32_v:
6097   case NEON::BI__builtin_neon_vcvtm_u32_v:
6098   case NEON::BI__builtin_neon_vcvtmq_u32_v:
6099   case NEON::BI__builtin_neon_vcvtm_s64_v:
6100   case NEON::BI__builtin_neon_vcvtmq_s64_v:
6101   case NEON::BI__builtin_neon_vcvtm_u64_v:
6102   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6103     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6104     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6105     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6106   }
6107   case NEON::BI__builtin_neon_vcvtn_s32_v:
6108   case NEON::BI__builtin_neon_vcvtnq_s32_v:
6109   case NEON::BI__builtin_neon_vcvtn_u32_v:
6110   case NEON::BI__builtin_neon_vcvtnq_u32_v:
6111   case NEON::BI__builtin_neon_vcvtn_s64_v:
6112   case NEON::BI__builtin_neon_vcvtnq_s64_v:
6113   case NEON::BI__builtin_neon_vcvtn_u64_v:
6114   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6115     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6116     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6117     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6118   }
6119   case NEON::BI__builtin_neon_vcvtp_s32_v:
6120   case NEON::BI__builtin_neon_vcvtpq_s32_v:
6121   case NEON::BI__builtin_neon_vcvtp_u32_v:
6122   case NEON::BI__builtin_neon_vcvtpq_u32_v:
6123   case NEON::BI__builtin_neon_vcvtp_s64_v:
6124   case NEON::BI__builtin_neon_vcvtpq_s64_v:
6125   case NEON::BI__builtin_neon_vcvtp_u64_v:
6126   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6127     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6128     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6129     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6130   }
6131   case NEON::BI__builtin_neon_vmulx_v:
6132   case NEON::BI__builtin_neon_vmulxq_v: {
6133     Int = Intrinsic::aarch64_neon_fmulx;
6134     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6135   }
6136   case NEON::BI__builtin_neon_vmul_lane_v:
6137   case NEON::BI__builtin_neon_vmul_laneq_v: {
6138     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6139     bool Quad = false;
6140     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6141       Quad = true;
6142     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6143     llvm::Type *VTy = GetNeonType(this,
6144       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
6145     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6146     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6147     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
6148     return Builder.CreateBitCast(Result, Ty);
6149   }
6150   case NEON::BI__builtin_neon_vnegd_s64:
6151     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
6152   case NEON::BI__builtin_neon_vpmaxnm_v:
6153   case NEON::BI__builtin_neon_vpmaxnmq_v: {
6154     Int = Intrinsic::aarch64_neon_fmaxnmp;
6155     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
6156   }
6157   case NEON::BI__builtin_neon_vpminnm_v:
6158   case NEON::BI__builtin_neon_vpminnmq_v: {
6159     Int = Intrinsic::aarch64_neon_fminnmp;
6160     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
6161   }
6162   case NEON::BI__builtin_neon_vsqrt_v:
6163   case NEON::BI__builtin_neon_vsqrtq_v: {
6164     Int = Intrinsic::sqrt;
6165     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6166     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
6167   }
6168   case NEON::BI__builtin_neon_vrbit_v:
6169   case NEON::BI__builtin_neon_vrbitq_v: {
6170     Int = Intrinsic::aarch64_neon_rbit;
6171     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
6172   }
6173   case NEON::BI__builtin_neon_vaddv_u8:
6174     // FIXME: These are handled by the AArch64 scalar code.
6175     usgn = true;
6176     // FALLTHROUGH
6177   case NEON::BI__builtin_neon_vaddv_s8: {
6178     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6179     Ty = Int32Ty;
6180     VTy = llvm::VectorType::get(Int8Ty, 8);
6181     llvm::Type *Tys[2] = { Ty, VTy };
6182     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6183     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6184     return Builder.CreateTrunc(Ops[0], Int8Ty);
6185   }
6186   case NEON::BI__builtin_neon_vaddv_u16:
6187     usgn = true;
6188     // FALLTHROUGH
6189   case NEON::BI__builtin_neon_vaddv_s16: {
6190     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6191     Ty = Int32Ty;
6192     VTy = llvm::VectorType::get(Int16Ty, 4);
6193     llvm::Type *Tys[2] = { Ty, VTy };
6194     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6195     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6196     return Builder.CreateTrunc(Ops[0], Int16Ty);
6197   }
6198   case NEON::BI__builtin_neon_vaddvq_u8:
6199     usgn = true;
6200     // FALLTHROUGH
6201   case NEON::BI__builtin_neon_vaddvq_s8: {
6202     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6203     Ty = Int32Ty;
6204     VTy = llvm::VectorType::get(Int8Ty, 16);
6205     llvm::Type *Tys[2] = { Ty, VTy };
6206     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6207     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6208     return Builder.CreateTrunc(Ops[0], Int8Ty);
6209   }
6210   case NEON::BI__builtin_neon_vaddvq_u16:
6211     usgn = true;
6212     // FALLTHROUGH
6213   case NEON::BI__builtin_neon_vaddvq_s16: {
6214     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6215     Ty = Int32Ty;
6216     VTy = llvm::VectorType::get(Int16Ty, 8);
6217     llvm::Type *Tys[2] = { Ty, VTy };
6218     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6219     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6220     return Builder.CreateTrunc(Ops[0], Int16Ty);
6221   }
6222   case NEON::BI__builtin_neon_vmaxv_u8: {
6223     Int = Intrinsic::aarch64_neon_umaxv;
6224     Ty = Int32Ty;
6225     VTy = llvm::VectorType::get(Int8Ty, 8);
6226     llvm::Type *Tys[2] = { Ty, VTy };
6227     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6228     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6229     return Builder.CreateTrunc(Ops[0], Int8Ty);
6230   }
6231   case NEON::BI__builtin_neon_vmaxv_u16: {
6232     Int = Intrinsic::aarch64_neon_umaxv;
6233     Ty = Int32Ty;
6234     VTy = llvm::VectorType::get(Int16Ty, 4);
6235     llvm::Type *Tys[2] = { Ty, VTy };
6236     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6237     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6238     return Builder.CreateTrunc(Ops[0], Int16Ty);
6239   }
6240   case NEON::BI__builtin_neon_vmaxvq_u8: {
6241     Int = Intrinsic::aarch64_neon_umaxv;
6242     Ty = Int32Ty;
6243     VTy = llvm::VectorType::get(Int8Ty, 16);
6244     llvm::Type *Tys[2] = { Ty, VTy };
6245     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6246     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6247     return Builder.CreateTrunc(Ops[0], Int8Ty);
6248   }
6249   case NEON::BI__builtin_neon_vmaxvq_u16: {
6250     Int = Intrinsic::aarch64_neon_umaxv;
6251     Ty = Int32Ty;
6252     VTy = llvm::VectorType::get(Int16Ty, 8);
6253     llvm::Type *Tys[2] = { Ty, VTy };
6254     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6255     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6256     return Builder.CreateTrunc(Ops[0], Int16Ty);
6257   }
6258   case NEON::BI__builtin_neon_vmaxv_s8: {
6259     Int = Intrinsic::aarch64_neon_smaxv;
6260     Ty = Int32Ty;
6261     VTy = llvm::VectorType::get(Int8Ty, 8);
6262     llvm::Type *Tys[2] = { Ty, VTy };
6263     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6264     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6265     return Builder.CreateTrunc(Ops[0], Int8Ty);
6266   }
6267   case NEON::BI__builtin_neon_vmaxv_s16: {
6268     Int = Intrinsic::aarch64_neon_smaxv;
6269     Ty = Int32Ty;
6270     VTy = llvm::VectorType::get(Int16Ty, 4);
6271     llvm::Type *Tys[2] = { Ty, VTy };
6272     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6273     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6274     return Builder.CreateTrunc(Ops[0], Int16Ty);
6275   }
6276   case NEON::BI__builtin_neon_vmaxvq_s8: {
6277     Int = Intrinsic::aarch64_neon_smaxv;
6278     Ty = Int32Ty;
6279     VTy = llvm::VectorType::get(Int8Ty, 16);
6280     llvm::Type *Tys[2] = { Ty, VTy };
6281     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6282     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6283     return Builder.CreateTrunc(Ops[0], Int8Ty);
6284   }
6285   case NEON::BI__builtin_neon_vmaxvq_s16: {
6286     Int = Intrinsic::aarch64_neon_smaxv;
6287     Ty = Int32Ty;
6288     VTy = llvm::VectorType::get(Int16Ty, 8);
6289     llvm::Type *Tys[2] = { Ty, VTy };
6290     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6291     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6292     return Builder.CreateTrunc(Ops[0], Int16Ty);
6293   }
6294   case NEON::BI__builtin_neon_vminv_u8: {
6295     Int = Intrinsic::aarch64_neon_uminv;
6296     Ty = Int32Ty;
6297     VTy = llvm::VectorType::get(Int8Ty, 8);
6298     llvm::Type *Tys[2] = { Ty, VTy };
6299     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6300     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6301     return Builder.CreateTrunc(Ops[0], Int8Ty);
6302   }
6303   case NEON::BI__builtin_neon_vminv_u16: {
6304     Int = Intrinsic::aarch64_neon_uminv;
6305     Ty = Int32Ty;
6306     VTy = llvm::VectorType::get(Int16Ty, 4);
6307     llvm::Type *Tys[2] = { Ty, VTy };
6308     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6309     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6310     return Builder.CreateTrunc(Ops[0], Int16Ty);
6311   }
6312   case NEON::BI__builtin_neon_vminvq_u8: {
6313     Int = Intrinsic::aarch64_neon_uminv;
6314     Ty = Int32Ty;
6315     VTy = llvm::VectorType::get(Int8Ty, 16);
6316     llvm::Type *Tys[2] = { Ty, VTy };
6317     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6318     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6319     return Builder.CreateTrunc(Ops[0], Int8Ty);
6320   }
6321   case NEON::BI__builtin_neon_vminvq_u16: {
6322     Int = Intrinsic::aarch64_neon_uminv;
6323     Ty = Int32Ty;
6324     VTy = llvm::VectorType::get(Int16Ty, 8);
6325     llvm::Type *Tys[2] = { Ty, VTy };
6326     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6327     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6328     return Builder.CreateTrunc(Ops[0], Int16Ty);
6329   }
6330   case NEON::BI__builtin_neon_vminv_s8: {
6331     Int = Intrinsic::aarch64_neon_sminv;
6332     Ty = Int32Ty;
6333     VTy = llvm::VectorType::get(Int8Ty, 8);
6334     llvm::Type *Tys[2] = { Ty, VTy };
6335     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6336     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6337     return Builder.CreateTrunc(Ops[0], Int8Ty);
6338   }
6339   case NEON::BI__builtin_neon_vminv_s16: {
6340     Int = Intrinsic::aarch64_neon_sminv;
6341     Ty = Int32Ty;
6342     VTy = llvm::VectorType::get(Int16Ty, 4);
6343     llvm::Type *Tys[2] = { Ty, VTy };
6344     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6345     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6346     return Builder.CreateTrunc(Ops[0], Int16Ty);
6347   }
6348   case NEON::BI__builtin_neon_vminvq_s8: {
6349     Int = Intrinsic::aarch64_neon_sminv;
6350     Ty = Int32Ty;
6351     VTy = llvm::VectorType::get(Int8Ty, 16);
6352     llvm::Type *Tys[2] = { Ty, VTy };
6353     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6354     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6355     return Builder.CreateTrunc(Ops[0], Int8Ty);
6356   }
6357   case NEON::BI__builtin_neon_vminvq_s16: {
6358     Int = Intrinsic::aarch64_neon_sminv;
6359     Ty = Int32Ty;
6360     VTy = llvm::VectorType::get(Int16Ty, 8);
6361     llvm::Type *Tys[2] = { Ty, VTy };
6362     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6363     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6364     return Builder.CreateTrunc(Ops[0], Int16Ty);
6365   }
6366   case NEON::BI__builtin_neon_vmul_n_f64: {
6367     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6368     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
6369     return Builder.CreateFMul(Ops[0], RHS);
6370   }
6371   case NEON::BI__builtin_neon_vaddlv_u8: {
6372     Int = Intrinsic::aarch64_neon_uaddlv;
6373     Ty = Int32Ty;
6374     VTy = llvm::VectorType::get(Int8Ty, 8);
6375     llvm::Type *Tys[2] = { Ty, VTy };
6376     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6377     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6378     return Builder.CreateTrunc(Ops[0], Int16Ty);
6379   }
6380   case NEON::BI__builtin_neon_vaddlv_u16: {
6381     Int = Intrinsic::aarch64_neon_uaddlv;
6382     Ty = Int32Ty;
6383     VTy = llvm::VectorType::get(Int16Ty, 4);
6384     llvm::Type *Tys[2] = { Ty, VTy };
6385     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6386     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6387   }
6388   case NEON::BI__builtin_neon_vaddlvq_u8: {
6389     Int = Intrinsic::aarch64_neon_uaddlv;
6390     Ty = Int32Ty;
6391     VTy = llvm::VectorType::get(Int8Ty, 16);
6392     llvm::Type *Tys[2] = { Ty, VTy };
6393     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6394     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6395     return Builder.CreateTrunc(Ops[0], Int16Ty);
6396   }
6397   case NEON::BI__builtin_neon_vaddlvq_u16: {
6398     Int = Intrinsic::aarch64_neon_uaddlv;
6399     Ty = Int32Ty;
6400     VTy = llvm::VectorType::get(Int16Ty, 8);
6401     llvm::Type *Tys[2] = { Ty, VTy };
6402     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6403     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6404   }
6405   case NEON::BI__builtin_neon_vaddlv_s8: {
6406     Int = Intrinsic::aarch64_neon_saddlv;
6407     Ty = Int32Ty;
6408     VTy = llvm::VectorType::get(Int8Ty, 8);
6409     llvm::Type *Tys[2] = { Ty, VTy };
6410     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6411     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6412     return Builder.CreateTrunc(Ops[0], Int16Ty);
6413   }
6414   case NEON::BI__builtin_neon_vaddlv_s16: {
6415     Int = Intrinsic::aarch64_neon_saddlv;
6416     Ty = Int32Ty;
6417     VTy = llvm::VectorType::get(Int16Ty, 4);
6418     llvm::Type *Tys[2] = { Ty, VTy };
6419     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6420     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6421   }
6422   case NEON::BI__builtin_neon_vaddlvq_s8: {
6423     Int = Intrinsic::aarch64_neon_saddlv;
6424     Ty = Int32Ty;
6425     VTy = llvm::VectorType::get(Int8Ty, 16);
6426     llvm::Type *Tys[2] = { Ty, VTy };
6427     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6428     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6429     return Builder.CreateTrunc(Ops[0], Int16Ty);
6430   }
6431   case NEON::BI__builtin_neon_vaddlvq_s16: {
6432     Int = Intrinsic::aarch64_neon_saddlv;
6433     Ty = Int32Ty;
6434     VTy = llvm::VectorType::get(Int16Ty, 8);
6435     llvm::Type *Tys[2] = { Ty, VTy };
6436     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6437     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6438   }
6439   case NEON::BI__builtin_neon_vsri_n_v:
6440   case NEON::BI__builtin_neon_vsriq_n_v: {
6441     Int = Intrinsic::aarch64_neon_vsri;
6442     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6443     return EmitNeonCall(Intrin, Ops, "vsri_n");
6444   }
6445   case NEON::BI__builtin_neon_vsli_n_v:
6446   case NEON::BI__builtin_neon_vsliq_n_v: {
6447     Int = Intrinsic::aarch64_neon_vsli;
6448     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6449     return EmitNeonCall(Intrin, Ops, "vsli_n");
6450   }
6451   case NEON::BI__builtin_neon_vsra_n_v:
6452   case NEON::BI__builtin_neon_vsraq_n_v:
6453     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6454     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6455     return Builder.CreateAdd(Ops[0], Ops[1]);
6456   case NEON::BI__builtin_neon_vrsra_n_v:
6457   case NEON::BI__builtin_neon_vrsraq_n_v: {
6458     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6459     SmallVector<llvm::Value*,2> TmpOps;
6460     TmpOps.push_back(Ops[1]);
6461     TmpOps.push_back(Ops[2]);
6462     Function* F = CGM.getIntrinsic(Int, Ty);
6463     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6464     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6465     return Builder.CreateAdd(Ops[0], tmp);
6466   }
6467     // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
6468     // of an Align parameter here.
6469   case NEON::BI__builtin_neon_vld1_x2_v:
6470   case NEON::BI__builtin_neon_vld1q_x2_v:
6471   case NEON::BI__builtin_neon_vld1_x3_v:
6472   case NEON::BI__builtin_neon_vld1q_x3_v:
6473   case NEON::BI__builtin_neon_vld1_x4_v:
6474   case NEON::BI__builtin_neon_vld1q_x4_v: {
6475     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6476     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6477     llvm::Type *Tys[2] = { VTy, PTy };
6478     unsigned Int;
6479     switch (BuiltinID) {
6480     case NEON::BI__builtin_neon_vld1_x2_v:
6481     case NEON::BI__builtin_neon_vld1q_x2_v:
6482       Int = Intrinsic::aarch64_neon_ld1x2;
6483       break;
6484     case NEON::BI__builtin_neon_vld1_x3_v:
6485     case NEON::BI__builtin_neon_vld1q_x3_v:
6486       Int = Intrinsic::aarch64_neon_ld1x3;
6487       break;
6488     case NEON::BI__builtin_neon_vld1_x4_v:
6489     case NEON::BI__builtin_neon_vld1q_x4_v:
6490       Int = Intrinsic::aarch64_neon_ld1x4;
6491       break;
6492     }
6493     Function *F = CGM.getIntrinsic(Int, Tys);
6494     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
6495     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6496     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6497     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6498   }
6499   case NEON::BI__builtin_neon_vst1_x2_v:
6500   case NEON::BI__builtin_neon_vst1q_x2_v:
6501   case NEON::BI__builtin_neon_vst1_x3_v:
6502   case NEON::BI__builtin_neon_vst1q_x3_v:
6503   case NEON::BI__builtin_neon_vst1_x4_v:
6504   case NEON::BI__builtin_neon_vst1q_x4_v: {
6505     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6506     llvm::Type *Tys[2] = { VTy, PTy };
6507     unsigned Int;
6508     switch (BuiltinID) {
6509     case NEON::BI__builtin_neon_vst1_x2_v:
6510     case NEON::BI__builtin_neon_vst1q_x2_v:
6511       Int = Intrinsic::aarch64_neon_st1x2;
6512       break;
6513     case NEON::BI__builtin_neon_vst1_x3_v:
6514     case NEON::BI__builtin_neon_vst1q_x3_v:
6515       Int = Intrinsic::aarch64_neon_st1x3;
6516       break;
6517     case NEON::BI__builtin_neon_vst1_x4_v:
6518     case NEON::BI__builtin_neon_vst1q_x4_v:
6519       Int = Intrinsic::aarch64_neon_st1x4;
6520       break;
6521     }
6522     std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6523     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
6524   }
6525   case NEON::BI__builtin_neon_vld1_v:
6526   case NEON::BI__builtin_neon_vld1q_v:
6527     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6528     return Builder.CreateDefaultAlignedLoad(Ops[0]);
6529   case NEON::BI__builtin_neon_vst1_v:
6530   case NEON::BI__builtin_neon_vst1q_v:
6531     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6532     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6533     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6534   case NEON::BI__builtin_neon_vld1_lane_v:
6535   case NEON::BI__builtin_neon_vld1q_lane_v:
6536     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6537     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6538     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6539     Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
6540     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6541   case NEON::BI__builtin_neon_vld1_dup_v:
6542   case NEON::BI__builtin_neon_vld1q_dup_v: {
6543     Value *V = UndefValue::get(Ty);
6544     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6545     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6546     Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
6547     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6548     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6549     return EmitNeonSplat(Ops[0], CI);
6550   }
6551   case NEON::BI__builtin_neon_vst1_lane_v:
6552   case NEON::BI__builtin_neon_vst1q_lane_v:
6553     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6554     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6555     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6556     return Builder.CreateDefaultAlignedStore(Ops[1],
6557                                              Builder.CreateBitCast(Ops[0], Ty));
6558   case NEON::BI__builtin_neon_vld2_v:
6559   case NEON::BI__builtin_neon_vld2q_v: {
6560     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6561     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6562     llvm::Type *Tys[2] = { VTy, PTy };
6563     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6564     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6565     Ops[0] = Builder.CreateBitCast(Ops[0],
6566                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6567     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6568   }
6569   case NEON::BI__builtin_neon_vld3_v:
6570   case NEON::BI__builtin_neon_vld3q_v: {
6571     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6572     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6573     llvm::Type *Tys[2] = { VTy, PTy };
6574     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6575     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6576     Ops[0] = Builder.CreateBitCast(Ops[0],
6577                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6578     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6579   }
6580   case NEON::BI__builtin_neon_vld4_v:
6581   case NEON::BI__builtin_neon_vld4q_v: {
6582     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6583     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6584     llvm::Type *Tys[2] = { VTy, PTy };
6585     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6586     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6587     Ops[0] = Builder.CreateBitCast(Ops[0],
6588                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6589     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6590   }
6591   case NEON::BI__builtin_neon_vld2_dup_v:
6592   case NEON::BI__builtin_neon_vld2q_dup_v: {
6593     llvm::Type *PTy =
6594       llvm::PointerType::getUnqual(VTy->getElementType());
6595     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6596     llvm::Type *Tys[2] = { VTy, PTy };
6597     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6598     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6599     Ops[0] = Builder.CreateBitCast(Ops[0],
6600                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6601     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6602   }
6603   case NEON::BI__builtin_neon_vld3_dup_v:
6604   case NEON::BI__builtin_neon_vld3q_dup_v: {
6605     llvm::Type *PTy =
6606       llvm::PointerType::getUnqual(VTy->getElementType());
6607     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6608     llvm::Type *Tys[2] = { VTy, PTy };
6609     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6610     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6611     Ops[0] = Builder.CreateBitCast(Ops[0],
6612                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6613     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6614   }
6615   case NEON::BI__builtin_neon_vld4_dup_v:
6616   case NEON::BI__builtin_neon_vld4q_dup_v: {
6617     llvm::Type *PTy =
6618       llvm::PointerType::getUnqual(VTy->getElementType());
6619     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6620     llvm::Type *Tys[2] = { VTy, PTy };
6621     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6622     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6623     Ops[0] = Builder.CreateBitCast(Ops[0],
6624                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6625     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6626   }
6627   case NEON::BI__builtin_neon_vld2_lane_v:
6628   case NEON::BI__builtin_neon_vld2q_lane_v: {
6629     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6630     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6631     Ops.push_back(Ops[1]);
6632     Ops.erase(Ops.begin()+1);
6633     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6634     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6635     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6636     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
6637     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6638     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6639     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6640   }
6641   case NEON::BI__builtin_neon_vld3_lane_v:
6642   case NEON::BI__builtin_neon_vld3q_lane_v: {
6643     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6644     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6645     Ops.push_back(Ops[1]);
6646     Ops.erase(Ops.begin()+1);
6647     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6648     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6649     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6650     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6651     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
6652     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6653     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6654     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6655   }
6656   case NEON::BI__builtin_neon_vld4_lane_v:
6657   case NEON::BI__builtin_neon_vld4q_lane_v: {
6658     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6659     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6660     Ops.push_back(Ops[1]);
6661     Ops.erase(Ops.begin()+1);
6662     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6663     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6664     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6665     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6666     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6667     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
6668     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6669     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6670     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6671   }
6672   case NEON::BI__builtin_neon_vst2_v:
6673   case NEON::BI__builtin_neon_vst2q_v: {
6674     Ops.push_back(Ops[0]);
6675     Ops.erase(Ops.begin());
6676     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6677     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6678                         Ops, "");
6679   }
6680   case NEON::BI__builtin_neon_vst2_lane_v:
6681   case NEON::BI__builtin_neon_vst2q_lane_v: {
6682     Ops.push_back(Ops[0]);
6683     Ops.erase(Ops.begin());
6684     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6685     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6686     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6687                         Ops, "");
6688   }
6689   case NEON::BI__builtin_neon_vst3_v:
6690   case NEON::BI__builtin_neon_vst3q_v: {
6691     Ops.push_back(Ops[0]);
6692     Ops.erase(Ops.begin());
6693     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6694     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6695                         Ops, "");
6696   }
6697   case NEON::BI__builtin_neon_vst3_lane_v:
6698   case NEON::BI__builtin_neon_vst3q_lane_v: {
6699     Ops.push_back(Ops[0]);
6700     Ops.erase(Ops.begin());
6701     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6702     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6703     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6704                         Ops, "");
6705   }
6706   case NEON::BI__builtin_neon_vst4_v:
6707   case NEON::BI__builtin_neon_vst4q_v: {
6708     Ops.push_back(Ops[0]);
6709     Ops.erase(Ops.begin());
6710     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6711     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6712                         Ops, "");
6713   }
6714   case NEON::BI__builtin_neon_vst4_lane_v:
6715   case NEON::BI__builtin_neon_vst4q_lane_v: {
6716     Ops.push_back(Ops[0]);
6717     Ops.erase(Ops.begin());
6718     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6719     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6720     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6721                         Ops, "");
6722   }
6723   case NEON::BI__builtin_neon_vtrn_v:
6724   case NEON::BI__builtin_neon_vtrnq_v: {
6725     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6726     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6727     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6728     Value *SV = nullptr;
6729 
6730     for (unsigned vi = 0; vi != 2; ++vi) {
6731       SmallVector<uint32_t, 16> Indices;
6732       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6733         Indices.push_back(i+vi);
6734         Indices.push_back(i+e+vi);
6735       }
6736       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6737       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
6738       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6739     }
6740     return SV;
6741   }
6742   case NEON::BI__builtin_neon_vuzp_v:
6743   case NEON::BI__builtin_neon_vuzpq_v: {
6744     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6745     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6746     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6747     Value *SV = nullptr;
6748 
6749     for (unsigned vi = 0; vi != 2; ++vi) {
6750       SmallVector<uint32_t, 16> Indices;
6751       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6752         Indices.push_back(2*i+vi);
6753 
6754       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6755       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
6756       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6757     }
6758     return SV;
6759   }
6760   case NEON::BI__builtin_neon_vzip_v:
6761   case NEON::BI__builtin_neon_vzipq_v: {
6762     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6763     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6764     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6765     Value *SV = nullptr;
6766 
6767     for (unsigned vi = 0; vi != 2; ++vi) {
6768       SmallVector<uint32_t, 16> Indices;
6769       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6770         Indices.push_back((i + vi*e) >> 1);
6771         Indices.push_back(((i + vi*e) >> 1)+e);
6772       }
6773       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6774       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
6775       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6776     }
6777     return SV;
6778   }
6779   case NEON::BI__builtin_neon_vqtbl1q_v: {
6780     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
6781                         Ops, "vtbl1");
6782   }
6783   case NEON::BI__builtin_neon_vqtbl2q_v: {
6784     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
6785                         Ops, "vtbl2");
6786   }
6787   case NEON::BI__builtin_neon_vqtbl3q_v: {
6788     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
6789                         Ops, "vtbl3");
6790   }
6791   case NEON::BI__builtin_neon_vqtbl4q_v: {
6792     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
6793                         Ops, "vtbl4");
6794   }
6795   case NEON::BI__builtin_neon_vqtbx1q_v: {
6796     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
6797                         Ops, "vtbx1");
6798   }
6799   case NEON::BI__builtin_neon_vqtbx2q_v: {
6800     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
6801                         Ops, "vtbx2");
6802   }
6803   case NEON::BI__builtin_neon_vqtbx3q_v: {
6804     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
6805                         Ops, "vtbx3");
6806   }
6807   case NEON::BI__builtin_neon_vqtbx4q_v: {
6808     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
6809                         Ops, "vtbx4");
6810   }
6811   case NEON::BI__builtin_neon_vsqadd_v:
6812   case NEON::BI__builtin_neon_vsqaddq_v: {
6813     Int = Intrinsic::aarch64_neon_usqadd;
6814     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
6815   }
6816   case NEON::BI__builtin_neon_vuqadd_v:
6817   case NEON::BI__builtin_neon_vuqaddq_v: {
6818     Int = Intrinsic::aarch64_neon_suqadd;
6819     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
6820   }
6821   }
6822 }
6823 
6824 llvm::Value *CodeGenFunction::
6825 BuildVector(ArrayRef<llvm::Value*> Ops) {
6826   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
6827          "Not a power-of-two sized vector!");
6828   bool AllConstants = true;
6829   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
6830     AllConstants &= isa<Constant>(Ops[i]);
6831 
6832   // If this is a constant vector, create a ConstantVector.
6833   if (AllConstants) {
6834     SmallVector<llvm::Constant*, 16> CstOps;
6835     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6836       CstOps.push_back(cast<Constant>(Ops[i]));
6837     return llvm::ConstantVector::get(CstOps);
6838   }
6839 
6840   // Otherwise, insertelement the values to build the vector.
6841   Value *Result =
6842     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
6843 
6844   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6845     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
6846 
6847   return Result;
6848 }
6849 
6850 // Convert the mask from an integer type to a vector of i1.
6851 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
6852                               unsigned NumElts) {
6853 
6854   llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
6855                          cast<IntegerType>(Mask->getType())->getBitWidth());
6856   Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
6857 
6858   // If we have less than 8 elements, then the starting mask was an i8 and
6859   // we need to extract down to the right number of elements.
6860   if (NumElts < 8) {
6861     uint32_t Indices[4];
6862     for (unsigned i = 0; i != NumElts; ++i)
6863       Indices[i] = i;
6864     MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
6865                                              makeArrayRef(Indices, NumElts),
6866                                              "extract");
6867   }
6868   return MaskVec;
6869 }
6870 
6871 static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
6872                                  SmallVectorImpl<Value *> &Ops,
6873                                  unsigned Align) {
6874   // Cast the pointer to right type.
6875   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
6876                                llvm::PointerType::getUnqual(Ops[1]->getType()));
6877 
6878   // If the mask is all ones just emit a regular store.
6879   if (const auto *C = dyn_cast<Constant>(Ops[2]))
6880     if (C->isAllOnesValue())
6881       return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
6882 
6883   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
6884                                    Ops[1]->getType()->getVectorNumElements());
6885 
6886   return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
6887 }
6888 
6889 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
6890                                 SmallVectorImpl<Value *> &Ops, unsigned Align) {
6891   // Cast the pointer to right type.
6892   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
6893                                llvm::PointerType::getUnqual(Ops[1]->getType()));
6894 
6895   // If the mask is all ones just emit a regular store.
6896   if (const auto *C = dyn_cast<Constant>(Ops[2]))
6897     if (C->isAllOnesValue())
6898       return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
6899 
6900   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
6901                                    Ops[1]->getType()->getVectorNumElements());
6902 
6903   return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
6904 }
6905 
6906 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
6907                                         SmallVectorImpl<Value *> &Ops,
6908                                         llvm::Type *DstTy,
6909                                         unsigned SrcSizeInBits,
6910                                         unsigned Align) {
6911   // Load the subvector.
6912   Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
6913 
6914   // Create broadcast mask.
6915   unsigned NumDstElts = DstTy->getVectorNumElements();
6916   unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
6917 
6918   SmallVector<uint32_t, 8> Mask;
6919   for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
6920     for (unsigned j = 0; j != NumSrcElts; ++j)
6921       Mask.push_back(j);
6922 
6923   return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
6924 }
6925 
6926 static Value *EmitX86Select(CodeGenFunction &CGF,
6927                             Value *Mask, Value *Op0, Value *Op1) {
6928 
6929   // If the mask is all ones just return first argument.
6930   if (const auto *C = dyn_cast<Constant>(Mask))
6931     if (C->isAllOnesValue())
6932       return Op0;
6933 
6934   Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
6935 
6936   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
6937 }
6938 
6939 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
6940                                    bool Signed, SmallVectorImpl<Value *> &Ops) {
6941   unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
6942   Value *Cmp;
6943 
6944   if (CC == 3) {
6945     Cmp = Constant::getNullValue(
6946                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
6947   } else if (CC == 7) {
6948     Cmp = Constant::getAllOnesValue(
6949                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
6950   } else {
6951     ICmpInst::Predicate Pred;
6952     switch (CC) {
6953     default: llvm_unreachable("Unknown condition code");
6954     case 0: Pred = ICmpInst::ICMP_EQ;  break;
6955     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
6956     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
6957     case 4: Pred = ICmpInst::ICMP_NE;  break;
6958     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
6959     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
6960     }
6961     Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
6962   }
6963 
6964   const auto *C = dyn_cast<Constant>(Ops.back());
6965   if (!C || !C->isAllOnesValue())
6966     Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
6967 
6968   if (NumElts < 8) {
6969     uint32_t Indices[8];
6970     for (unsigned i = 0; i != NumElts; ++i)
6971       Indices[i] = i;
6972     for (unsigned i = NumElts; i != 8; ++i)
6973       Indices[i] = i % NumElts + NumElts;
6974     Cmp = CGF.Builder.CreateShuffleVector(
6975         Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
6976   }
6977   return CGF.Builder.CreateBitCast(Cmp,
6978                                    IntegerType::get(CGF.getLLVMContext(),
6979                                                     std::max(NumElts, 8U)));
6980 }
6981 
6982 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
6983                                            const CallExpr *E) {
6984   if (BuiltinID == X86::BI__builtin_ms_va_start ||
6985       BuiltinID == X86::BI__builtin_ms_va_end)
6986     return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
6987                           BuiltinID == X86::BI__builtin_ms_va_start);
6988   if (BuiltinID == X86::BI__builtin_ms_va_copy) {
6989     // Lower this manually. We can't reliably determine whether or not any
6990     // given va_copy() is for a Win64 va_list from the calling convention
6991     // alone, because it's legal to do this from a System V ABI function.
6992     // With opaque pointer types, we won't have enough information in LLVM
6993     // IR to determine this from the argument types, either. Best to do it
6994     // now, while we have enough information.
6995     Address DestAddr = EmitMSVAListRef(E->getArg(0));
6996     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6997 
6998     llvm::Type *BPP = Int8PtrPtrTy;
6999 
7000     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
7001                        DestAddr.getAlignment());
7002     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
7003                       SrcAddr.getAlignment());
7004 
7005     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
7006     return Builder.CreateStore(ArgPtr, DestAddr);
7007   }
7008 
7009   SmallVector<Value*, 4> Ops;
7010 
7011   // Find out if any arguments are required to be integer constant expressions.
7012   unsigned ICEArguments = 0;
7013   ASTContext::GetBuiltinTypeError Error;
7014   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
7015   assert(Error == ASTContext::GE_None && "Should not codegen an error");
7016 
7017   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
7018     // If this is a normal argument, just emit it as a scalar.
7019     if ((ICEArguments & (1 << i)) == 0) {
7020       Ops.push_back(EmitScalarExpr(E->getArg(i)));
7021       continue;
7022     }
7023 
7024     // If this is required to be a constant, constant fold it so that we know
7025     // that the generated intrinsic gets a ConstantInt.
7026     llvm::APSInt Result;
7027     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
7028     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
7029     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
7030   }
7031 
7032   // These exist so that the builtin that takes an immediate can be bounds
7033   // checked by clang to avoid passing bad immediates to the backend. Since
7034   // AVX has a larger immediate than SSE we would need separate builtins to
7035   // do the different bounds checking. Rather than create a clang specific
7036   // SSE only builtin, this implements eight separate builtins to match gcc
7037   // implementation.
7038   auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
7039     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
7040     llvm::Function *F = CGM.getIntrinsic(ID);
7041     return Builder.CreateCall(F, Ops);
7042   };
7043 
7044   // For the vector forms of FP comparisons, translate the builtins directly to
7045   // IR.
7046   // TODO: The builtins could be removed if the SSE header files used vector
7047   // extension comparisons directly (vector ordered/unordered may need
7048   // additional support via __builtin_isnan()).
7049   auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
7050     Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
7051     llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
7052     llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
7053     Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
7054     return Builder.CreateBitCast(Sext, FPVecTy);
7055   };
7056 
7057   switch (BuiltinID) {
7058   default: return nullptr;
7059   case X86::BI__builtin_cpu_supports: {
7060     const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
7061     StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
7062 
7063     // TODO: When/if this becomes more than x86 specific then use a TargetInfo
7064     // based mapping.
7065     // Processor features and mapping to processor feature value.
7066     enum X86Features {
7067       CMOV = 0,
7068       MMX,
7069       POPCNT,
7070       SSE,
7071       SSE2,
7072       SSE3,
7073       SSSE3,
7074       SSE4_1,
7075       SSE4_2,
7076       AVX,
7077       AVX2,
7078       SSE4_A,
7079       FMA4,
7080       XOP,
7081       FMA,
7082       AVX512F,
7083       BMI,
7084       BMI2,
7085       AES,
7086       PCLMUL,
7087       AVX512VL,
7088       AVX512BW,
7089       AVX512DQ,
7090       AVX512CD,
7091       AVX512ER,
7092       AVX512PF,
7093       AVX512VBMI,
7094       AVX512IFMA,
7095       MAX
7096     };
7097 
7098     X86Features Feature = StringSwitch<X86Features>(FeatureStr)
7099                               .Case("cmov", X86Features::CMOV)
7100                               .Case("mmx", X86Features::MMX)
7101                               .Case("popcnt", X86Features::POPCNT)
7102                               .Case("sse", X86Features::SSE)
7103                               .Case("sse2", X86Features::SSE2)
7104                               .Case("sse3", X86Features::SSE3)
7105                               .Case("ssse3", X86Features::SSSE3)
7106                               .Case("sse4.1", X86Features::SSE4_1)
7107                               .Case("sse4.2", X86Features::SSE4_2)
7108                               .Case("avx", X86Features::AVX)
7109                               .Case("avx2", X86Features::AVX2)
7110                               .Case("sse4a", X86Features::SSE4_A)
7111                               .Case("fma4", X86Features::FMA4)
7112                               .Case("xop", X86Features::XOP)
7113                               .Case("fma", X86Features::FMA)
7114                               .Case("avx512f", X86Features::AVX512F)
7115                               .Case("bmi", X86Features::BMI)
7116                               .Case("bmi2", X86Features::BMI2)
7117                               .Case("aes", X86Features::AES)
7118                               .Case("pclmul", X86Features::PCLMUL)
7119                               .Case("avx512vl", X86Features::AVX512VL)
7120                               .Case("avx512bw", X86Features::AVX512BW)
7121                               .Case("avx512dq", X86Features::AVX512DQ)
7122                               .Case("avx512cd", X86Features::AVX512CD)
7123                               .Case("avx512er", X86Features::AVX512ER)
7124                               .Case("avx512pf", X86Features::AVX512PF)
7125                               .Case("avx512vbmi", X86Features::AVX512VBMI)
7126                               .Case("avx512ifma", X86Features::AVX512IFMA)
7127                               .Default(X86Features::MAX);
7128     assert(Feature != X86Features::MAX && "Invalid feature!");
7129 
7130     // Matching the struct layout from the compiler-rt/libgcc structure that is
7131     // filled in:
7132     // unsigned int __cpu_vendor;
7133     // unsigned int __cpu_type;
7134     // unsigned int __cpu_subtype;
7135     // unsigned int __cpu_features[1];
7136     llvm::Type *STy = llvm::StructType::get(
7137         Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr);
7138 
7139     // Grab the global __cpu_model.
7140     llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
7141 
7142     // Grab the first (0th) element from the field __cpu_features off of the
7143     // global in the struct STy.
7144     Value *Idxs[] = {
7145       ConstantInt::get(Int32Ty, 0),
7146       ConstantInt::get(Int32Ty, 3),
7147       ConstantInt::get(Int32Ty, 0)
7148     };
7149     Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
7150     Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
7151                                                 CharUnits::fromQuantity(4));
7152 
7153     // Check the value of the bit corresponding to the feature requested.
7154     Value *Bitset = Builder.CreateAnd(
7155         Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
7156     return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
7157   }
7158   case X86::BI_mm_prefetch: {
7159     Value *Address = Ops[0];
7160     Value *RW = ConstantInt::get(Int32Ty, 0);
7161     Value *Locality = Ops[1];
7162     Value *Data = ConstantInt::get(Int32Ty, 1);
7163     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
7164     return Builder.CreateCall(F, {Address, RW, Locality, Data});
7165   }
7166   case X86::BI_mm_clflush: {
7167     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
7168                               Ops[0]);
7169   }
7170   case X86::BI_mm_lfence: {
7171     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
7172   }
7173   case X86::BI_mm_mfence: {
7174     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
7175   }
7176   case X86::BI_mm_sfence: {
7177     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
7178   }
7179   case X86::BI_mm_pause: {
7180     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
7181   }
7182   case X86::BI__rdtsc: {
7183     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
7184   }
7185   case X86::BI__builtin_ia32_undef128:
7186   case X86::BI__builtin_ia32_undef256:
7187   case X86::BI__builtin_ia32_undef512:
7188     return UndefValue::get(ConvertType(E->getType()));
7189   case X86::BI__builtin_ia32_vec_init_v8qi:
7190   case X86::BI__builtin_ia32_vec_init_v4hi:
7191   case X86::BI__builtin_ia32_vec_init_v2si:
7192     return Builder.CreateBitCast(BuildVector(Ops),
7193                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
7194   case X86::BI__builtin_ia32_vec_ext_v2si:
7195     return Builder.CreateExtractElement(Ops[0],
7196                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
7197   case X86::BI_mm_setcsr:
7198   case X86::BI__builtin_ia32_ldmxcsr: {
7199     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
7200     Builder.CreateStore(Ops[0], Tmp);
7201     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
7202                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7203   }
7204   case X86::BI_mm_getcsr:
7205   case X86::BI__builtin_ia32_stmxcsr: {
7206     Address Tmp = CreateMemTemp(E->getType());
7207     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
7208                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7209     return Builder.CreateLoad(Tmp, "stmxcsr");
7210   }
7211   case X86::BI__builtin_ia32_xsave:
7212   case X86::BI__builtin_ia32_xsave64:
7213   case X86::BI__builtin_ia32_xrstor:
7214   case X86::BI__builtin_ia32_xrstor64:
7215   case X86::BI__builtin_ia32_xsaveopt:
7216   case X86::BI__builtin_ia32_xsaveopt64:
7217   case X86::BI__builtin_ia32_xrstors:
7218   case X86::BI__builtin_ia32_xrstors64:
7219   case X86::BI__builtin_ia32_xsavec:
7220   case X86::BI__builtin_ia32_xsavec64:
7221   case X86::BI__builtin_ia32_xsaves:
7222   case X86::BI__builtin_ia32_xsaves64: {
7223     Intrinsic::ID ID;
7224 #define INTRINSIC_X86_XSAVE_ID(NAME) \
7225     case X86::BI__builtin_ia32_##NAME: \
7226       ID = Intrinsic::x86_##NAME; \
7227       break
7228     switch (BuiltinID) {
7229     default: llvm_unreachable("Unsupported intrinsic!");
7230     INTRINSIC_X86_XSAVE_ID(xsave);
7231     INTRINSIC_X86_XSAVE_ID(xsave64);
7232     INTRINSIC_X86_XSAVE_ID(xrstor);
7233     INTRINSIC_X86_XSAVE_ID(xrstor64);
7234     INTRINSIC_X86_XSAVE_ID(xsaveopt);
7235     INTRINSIC_X86_XSAVE_ID(xsaveopt64);
7236     INTRINSIC_X86_XSAVE_ID(xrstors);
7237     INTRINSIC_X86_XSAVE_ID(xrstors64);
7238     INTRINSIC_X86_XSAVE_ID(xsavec);
7239     INTRINSIC_X86_XSAVE_ID(xsavec64);
7240     INTRINSIC_X86_XSAVE_ID(xsaves);
7241     INTRINSIC_X86_XSAVE_ID(xsaves64);
7242     }
7243 #undef INTRINSIC_X86_XSAVE_ID
7244     Value *Mhi = Builder.CreateTrunc(
7245       Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
7246     Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
7247     Ops[1] = Mhi;
7248     Ops.push_back(Mlo);
7249     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7250   }
7251   case X86::BI__builtin_ia32_storedqudi128_mask:
7252   case X86::BI__builtin_ia32_storedqusi128_mask:
7253   case X86::BI__builtin_ia32_storedquhi128_mask:
7254   case X86::BI__builtin_ia32_storedquqi128_mask:
7255   case X86::BI__builtin_ia32_storeupd128_mask:
7256   case X86::BI__builtin_ia32_storeups128_mask:
7257   case X86::BI__builtin_ia32_storedqudi256_mask:
7258   case X86::BI__builtin_ia32_storedqusi256_mask:
7259   case X86::BI__builtin_ia32_storedquhi256_mask:
7260   case X86::BI__builtin_ia32_storedquqi256_mask:
7261   case X86::BI__builtin_ia32_storeupd256_mask:
7262   case X86::BI__builtin_ia32_storeups256_mask:
7263   case X86::BI__builtin_ia32_storedqudi512_mask:
7264   case X86::BI__builtin_ia32_storedqusi512_mask:
7265   case X86::BI__builtin_ia32_storedquhi512_mask:
7266   case X86::BI__builtin_ia32_storedquqi512_mask:
7267   case X86::BI__builtin_ia32_storeupd512_mask:
7268   case X86::BI__builtin_ia32_storeups512_mask:
7269     return EmitX86MaskedStore(*this, Ops, 1);
7270 
7271   case X86::BI__builtin_ia32_movdqa32store128_mask:
7272   case X86::BI__builtin_ia32_movdqa64store128_mask:
7273   case X86::BI__builtin_ia32_storeaps128_mask:
7274   case X86::BI__builtin_ia32_storeapd128_mask:
7275   case X86::BI__builtin_ia32_movdqa32store256_mask:
7276   case X86::BI__builtin_ia32_movdqa64store256_mask:
7277   case X86::BI__builtin_ia32_storeaps256_mask:
7278   case X86::BI__builtin_ia32_storeapd256_mask:
7279   case X86::BI__builtin_ia32_movdqa32store512_mask:
7280   case X86::BI__builtin_ia32_movdqa64store512_mask:
7281   case X86::BI__builtin_ia32_storeaps512_mask:
7282   case X86::BI__builtin_ia32_storeapd512_mask: {
7283     unsigned Align =
7284       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7285     return EmitX86MaskedStore(*this, Ops, Align);
7286   }
7287   case X86::BI__builtin_ia32_loadups128_mask:
7288   case X86::BI__builtin_ia32_loadups256_mask:
7289   case X86::BI__builtin_ia32_loadups512_mask:
7290   case X86::BI__builtin_ia32_loadupd128_mask:
7291   case X86::BI__builtin_ia32_loadupd256_mask:
7292   case X86::BI__builtin_ia32_loadupd512_mask:
7293   case X86::BI__builtin_ia32_loaddquqi128_mask:
7294   case X86::BI__builtin_ia32_loaddquqi256_mask:
7295   case X86::BI__builtin_ia32_loaddquqi512_mask:
7296   case X86::BI__builtin_ia32_loaddquhi128_mask:
7297   case X86::BI__builtin_ia32_loaddquhi256_mask:
7298   case X86::BI__builtin_ia32_loaddquhi512_mask:
7299   case X86::BI__builtin_ia32_loaddqusi128_mask:
7300   case X86::BI__builtin_ia32_loaddqusi256_mask:
7301   case X86::BI__builtin_ia32_loaddqusi512_mask:
7302   case X86::BI__builtin_ia32_loaddqudi128_mask:
7303   case X86::BI__builtin_ia32_loaddqudi256_mask:
7304   case X86::BI__builtin_ia32_loaddqudi512_mask:
7305     return EmitX86MaskedLoad(*this, Ops, 1);
7306 
7307   case X86::BI__builtin_ia32_loadaps128_mask:
7308   case X86::BI__builtin_ia32_loadaps256_mask:
7309   case X86::BI__builtin_ia32_loadaps512_mask:
7310   case X86::BI__builtin_ia32_loadapd128_mask:
7311   case X86::BI__builtin_ia32_loadapd256_mask:
7312   case X86::BI__builtin_ia32_loadapd512_mask:
7313   case X86::BI__builtin_ia32_movdqa32load128_mask:
7314   case X86::BI__builtin_ia32_movdqa32load256_mask:
7315   case X86::BI__builtin_ia32_movdqa32load512_mask:
7316   case X86::BI__builtin_ia32_movdqa64load128_mask:
7317   case X86::BI__builtin_ia32_movdqa64load256_mask:
7318   case X86::BI__builtin_ia32_movdqa64load512_mask: {
7319     unsigned Align =
7320       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7321     return EmitX86MaskedLoad(*this, Ops, Align);
7322   }
7323 
7324   case X86::BI__builtin_ia32_vbroadcastf128_pd256:
7325   case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
7326     llvm::Type *DstTy = ConvertType(E->getType());
7327     return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
7328   }
7329 
7330   case X86::BI__builtin_ia32_storehps:
7331   case X86::BI__builtin_ia32_storelps: {
7332     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
7333     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
7334 
7335     // cast val v2i64
7336     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
7337 
7338     // extract (0, 1)
7339     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
7340     llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
7341     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
7342 
7343     // cast pointer to i64 & store
7344     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
7345     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7346   }
7347   case X86::BI__builtin_ia32_palignr128:
7348   case X86::BI__builtin_ia32_palignr256:
7349   case X86::BI__builtin_ia32_palignr128_mask:
7350   case X86::BI__builtin_ia32_palignr256_mask:
7351   case X86::BI__builtin_ia32_palignr512_mask: {
7352     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7353 
7354     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7355     assert(NumElts % 16 == 0);
7356 
7357     // If palignr is shifting the pair of vectors more than the size of two
7358     // lanes, emit zero.
7359     if (ShiftVal >= 32)
7360       return llvm::Constant::getNullValue(ConvertType(E->getType()));
7361 
7362     // If palignr is shifting the pair of input vectors more than one lane,
7363     // but less than two lanes, convert to shifting in zeroes.
7364     if (ShiftVal > 16) {
7365       ShiftVal -= 16;
7366       Ops[1] = Ops[0];
7367       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
7368     }
7369 
7370     uint32_t Indices[64];
7371     // 256-bit palignr operates on 128-bit lanes so we need to handle that
7372     for (unsigned l = 0; l != NumElts; l += 16) {
7373       for (unsigned i = 0; i != 16; ++i) {
7374         unsigned Idx = ShiftVal + i;
7375         if (Idx >= 16)
7376           Idx += NumElts - 16; // End of lane, switch operand.
7377         Indices[l + i] = Idx + l;
7378       }
7379     }
7380 
7381     Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
7382                                                makeArrayRef(Indices, NumElts),
7383                                                "palignr");
7384 
7385     // If this isn't a masked builtin, just return the align operation.
7386     if (Ops.size() == 3)
7387       return Align;
7388 
7389     return EmitX86Select(*this, Ops[4], Align, Ops[3]);
7390   }
7391 
7392   case X86::BI__builtin_ia32_movnti:
7393   case X86::BI__builtin_ia32_movnti64: {
7394     llvm::MDNode *Node = llvm::MDNode::get(
7395         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7396 
7397     // Convert the type of the pointer to a pointer to the stored type.
7398     Value *BC = Builder.CreateBitCast(Ops[0],
7399                                 llvm::PointerType::getUnqual(Ops[1]->getType()),
7400                                       "cast");
7401     StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC);
7402     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7403 
7404     // No alignment for scalar intrinsic store.
7405     SI->setAlignment(1);
7406     return SI;
7407   }
7408   case X86::BI__builtin_ia32_movntsd:
7409   case X86::BI__builtin_ia32_movntss: {
7410     llvm::MDNode *Node = llvm::MDNode::get(
7411         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7412 
7413     // Extract the 0'th element of the source vector.
7414     Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract");
7415 
7416     // Convert the type of the pointer to a pointer to the stored type.
7417     Value *BC = Builder.CreateBitCast(Ops[0],
7418                                 llvm::PointerType::getUnqual(Scl->getType()),
7419                                       "cast");
7420 
7421     // Unaligned nontemporal store of the scalar value.
7422     StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC);
7423     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7424     SI->setAlignment(1);
7425     return SI;
7426   }
7427 
7428   case X86::BI__builtin_ia32_selectb_128:
7429   case X86::BI__builtin_ia32_selectb_256:
7430   case X86::BI__builtin_ia32_selectb_512:
7431   case X86::BI__builtin_ia32_selectw_128:
7432   case X86::BI__builtin_ia32_selectw_256:
7433   case X86::BI__builtin_ia32_selectw_512:
7434   case X86::BI__builtin_ia32_selectd_128:
7435   case X86::BI__builtin_ia32_selectd_256:
7436   case X86::BI__builtin_ia32_selectd_512:
7437   case X86::BI__builtin_ia32_selectq_128:
7438   case X86::BI__builtin_ia32_selectq_256:
7439   case X86::BI__builtin_ia32_selectq_512:
7440   case X86::BI__builtin_ia32_selectps_128:
7441   case X86::BI__builtin_ia32_selectps_256:
7442   case X86::BI__builtin_ia32_selectps_512:
7443   case X86::BI__builtin_ia32_selectpd_128:
7444   case X86::BI__builtin_ia32_selectpd_256:
7445   case X86::BI__builtin_ia32_selectpd_512:
7446     return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
7447   case X86::BI__builtin_ia32_pcmpeqb128_mask:
7448   case X86::BI__builtin_ia32_pcmpeqb256_mask:
7449   case X86::BI__builtin_ia32_pcmpeqb512_mask:
7450   case X86::BI__builtin_ia32_pcmpeqw128_mask:
7451   case X86::BI__builtin_ia32_pcmpeqw256_mask:
7452   case X86::BI__builtin_ia32_pcmpeqw512_mask:
7453   case X86::BI__builtin_ia32_pcmpeqd128_mask:
7454   case X86::BI__builtin_ia32_pcmpeqd256_mask:
7455   case X86::BI__builtin_ia32_pcmpeqd512_mask:
7456   case X86::BI__builtin_ia32_pcmpeqq128_mask:
7457   case X86::BI__builtin_ia32_pcmpeqq256_mask:
7458   case X86::BI__builtin_ia32_pcmpeqq512_mask:
7459     return EmitX86MaskedCompare(*this, 0, false, Ops);
7460   case X86::BI__builtin_ia32_pcmpgtb128_mask:
7461   case X86::BI__builtin_ia32_pcmpgtb256_mask:
7462   case X86::BI__builtin_ia32_pcmpgtb512_mask:
7463   case X86::BI__builtin_ia32_pcmpgtw128_mask:
7464   case X86::BI__builtin_ia32_pcmpgtw256_mask:
7465   case X86::BI__builtin_ia32_pcmpgtw512_mask:
7466   case X86::BI__builtin_ia32_pcmpgtd128_mask:
7467   case X86::BI__builtin_ia32_pcmpgtd256_mask:
7468   case X86::BI__builtin_ia32_pcmpgtd512_mask:
7469   case X86::BI__builtin_ia32_pcmpgtq128_mask:
7470   case X86::BI__builtin_ia32_pcmpgtq256_mask:
7471   case X86::BI__builtin_ia32_pcmpgtq512_mask:
7472     return EmitX86MaskedCompare(*this, 6, true, Ops);
7473   case X86::BI__builtin_ia32_cmpb128_mask:
7474   case X86::BI__builtin_ia32_cmpb256_mask:
7475   case X86::BI__builtin_ia32_cmpb512_mask:
7476   case X86::BI__builtin_ia32_cmpw128_mask:
7477   case X86::BI__builtin_ia32_cmpw256_mask:
7478   case X86::BI__builtin_ia32_cmpw512_mask:
7479   case X86::BI__builtin_ia32_cmpd128_mask:
7480   case X86::BI__builtin_ia32_cmpd256_mask:
7481   case X86::BI__builtin_ia32_cmpd512_mask:
7482   case X86::BI__builtin_ia32_cmpq128_mask:
7483   case X86::BI__builtin_ia32_cmpq256_mask:
7484   case X86::BI__builtin_ia32_cmpq512_mask: {
7485     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7486     return EmitX86MaskedCompare(*this, CC, true, Ops);
7487   }
7488   case X86::BI__builtin_ia32_ucmpb128_mask:
7489   case X86::BI__builtin_ia32_ucmpb256_mask:
7490   case X86::BI__builtin_ia32_ucmpb512_mask:
7491   case X86::BI__builtin_ia32_ucmpw128_mask:
7492   case X86::BI__builtin_ia32_ucmpw256_mask:
7493   case X86::BI__builtin_ia32_ucmpw512_mask:
7494   case X86::BI__builtin_ia32_ucmpd128_mask:
7495   case X86::BI__builtin_ia32_ucmpd256_mask:
7496   case X86::BI__builtin_ia32_ucmpd512_mask:
7497   case X86::BI__builtin_ia32_ucmpq128_mask:
7498   case X86::BI__builtin_ia32_ucmpq256_mask:
7499   case X86::BI__builtin_ia32_ucmpq512_mask: {
7500     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7501     return EmitX86MaskedCompare(*this, CC, false, Ops);
7502   }
7503 
7504   case X86::BI__builtin_ia32_vplzcntd_128_mask:
7505   case X86::BI__builtin_ia32_vplzcntd_256_mask:
7506   case X86::BI__builtin_ia32_vplzcntd_512_mask:
7507   case X86::BI__builtin_ia32_vplzcntq_128_mask:
7508   case X86::BI__builtin_ia32_vplzcntq_256_mask:
7509   case X86::BI__builtin_ia32_vplzcntq_512_mask: {
7510     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
7511     return EmitX86Select(*this, Ops[2],
7512                          Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
7513                          Ops[1]);
7514   }
7515 
7516   // TODO: Handle 64/512-bit vector widths of min/max.
7517   case X86::BI__builtin_ia32_pmaxsb128:
7518   case X86::BI__builtin_ia32_pmaxsw128:
7519   case X86::BI__builtin_ia32_pmaxsd128:
7520   case X86::BI__builtin_ia32_pmaxsb256:
7521   case X86::BI__builtin_ia32_pmaxsw256:
7522   case X86::BI__builtin_ia32_pmaxsd256: {
7523     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Ops[1]);
7524     return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7525   }
7526   case X86::BI__builtin_ia32_pmaxub128:
7527   case X86::BI__builtin_ia32_pmaxuw128:
7528   case X86::BI__builtin_ia32_pmaxud128:
7529   case X86::BI__builtin_ia32_pmaxub256:
7530   case X86::BI__builtin_ia32_pmaxuw256:
7531   case X86::BI__builtin_ia32_pmaxud256: {
7532     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]);
7533     return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7534   }
7535   case X86::BI__builtin_ia32_pminsb128:
7536   case X86::BI__builtin_ia32_pminsw128:
7537   case X86::BI__builtin_ia32_pminsd128:
7538   case X86::BI__builtin_ia32_pminsb256:
7539   case X86::BI__builtin_ia32_pminsw256:
7540   case X86::BI__builtin_ia32_pminsd256: {
7541     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SLT, Ops[0], Ops[1]);
7542     return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7543   }
7544   case X86::BI__builtin_ia32_pminub128:
7545   case X86::BI__builtin_ia32_pminuw128:
7546   case X86::BI__builtin_ia32_pminud128:
7547   case X86::BI__builtin_ia32_pminub256:
7548   case X86::BI__builtin_ia32_pminuw256:
7549   case X86::BI__builtin_ia32_pminud256: {
7550     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, Ops[0], Ops[1]);
7551     return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7552   }
7553 
7554   // 3DNow!
7555   case X86::BI__builtin_ia32_pswapdsf:
7556   case X86::BI__builtin_ia32_pswapdsi: {
7557     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
7558     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
7559     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
7560     return Builder.CreateCall(F, Ops, "pswapd");
7561   }
7562   case X86::BI__builtin_ia32_rdrand16_step:
7563   case X86::BI__builtin_ia32_rdrand32_step:
7564   case X86::BI__builtin_ia32_rdrand64_step:
7565   case X86::BI__builtin_ia32_rdseed16_step:
7566   case X86::BI__builtin_ia32_rdseed32_step:
7567   case X86::BI__builtin_ia32_rdseed64_step: {
7568     Intrinsic::ID ID;
7569     switch (BuiltinID) {
7570     default: llvm_unreachable("Unsupported intrinsic!");
7571     case X86::BI__builtin_ia32_rdrand16_step:
7572       ID = Intrinsic::x86_rdrand_16;
7573       break;
7574     case X86::BI__builtin_ia32_rdrand32_step:
7575       ID = Intrinsic::x86_rdrand_32;
7576       break;
7577     case X86::BI__builtin_ia32_rdrand64_step:
7578       ID = Intrinsic::x86_rdrand_64;
7579       break;
7580     case X86::BI__builtin_ia32_rdseed16_step:
7581       ID = Intrinsic::x86_rdseed_16;
7582       break;
7583     case X86::BI__builtin_ia32_rdseed32_step:
7584       ID = Intrinsic::x86_rdseed_32;
7585       break;
7586     case X86::BI__builtin_ia32_rdseed64_step:
7587       ID = Intrinsic::x86_rdseed_64;
7588       break;
7589     }
7590 
7591     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
7592     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
7593                                       Ops[0]);
7594     return Builder.CreateExtractValue(Call, 1);
7595   }
7596 
7597   // SSE packed comparison intrinsics
7598   case X86::BI__builtin_ia32_cmpeqps:
7599   case X86::BI__builtin_ia32_cmpeqpd:
7600     return getVectorFCmpIR(CmpInst::FCMP_OEQ);
7601   case X86::BI__builtin_ia32_cmpltps:
7602   case X86::BI__builtin_ia32_cmpltpd:
7603     return getVectorFCmpIR(CmpInst::FCMP_OLT);
7604   case X86::BI__builtin_ia32_cmpleps:
7605   case X86::BI__builtin_ia32_cmplepd:
7606     return getVectorFCmpIR(CmpInst::FCMP_OLE);
7607   case X86::BI__builtin_ia32_cmpunordps:
7608   case X86::BI__builtin_ia32_cmpunordpd:
7609     return getVectorFCmpIR(CmpInst::FCMP_UNO);
7610   case X86::BI__builtin_ia32_cmpneqps:
7611   case X86::BI__builtin_ia32_cmpneqpd:
7612     return getVectorFCmpIR(CmpInst::FCMP_UNE);
7613   case X86::BI__builtin_ia32_cmpnltps:
7614   case X86::BI__builtin_ia32_cmpnltpd:
7615     return getVectorFCmpIR(CmpInst::FCMP_UGE);
7616   case X86::BI__builtin_ia32_cmpnleps:
7617   case X86::BI__builtin_ia32_cmpnlepd:
7618     return getVectorFCmpIR(CmpInst::FCMP_UGT);
7619   case X86::BI__builtin_ia32_cmpordps:
7620   case X86::BI__builtin_ia32_cmpordpd:
7621     return getVectorFCmpIR(CmpInst::FCMP_ORD);
7622   case X86::BI__builtin_ia32_cmpps:
7623   case X86::BI__builtin_ia32_cmpps256:
7624   case X86::BI__builtin_ia32_cmppd:
7625   case X86::BI__builtin_ia32_cmppd256: {
7626     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7627     // If this one of the SSE immediates, we can use native IR.
7628     if (CC < 8) {
7629       FCmpInst::Predicate Pred;
7630       switch (CC) {
7631       case 0: Pred = FCmpInst::FCMP_OEQ; break;
7632       case 1: Pred = FCmpInst::FCMP_OLT; break;
7633       case 2: Pred = FCmpInst::FCMP_OLE; break;
7634       case 3: Pred = FCmpInst::FCMP_UNO; break;
7635       case 4: Pred = FCmpInst::FCMP_UNE; break;
7636       case 5: Pred = FCmpInst::FCMP_UGE; break;
7637       case 6: Pred = FCmpInst::FCMP_UGT; break;
7638       case 7: Pred = FCmpInst::FCMP_ORD; break;
7639       }
7640       return getVectorFCmpIR(Pred);
7641     }
7642 
7643     // We can't handle 8-31 immediates with native IR, use the intrinsic.
7644     Intrinsic::ID ID;
7645     switch (BuiltinID) {
7646     default: llvm_unreachable("Unsupported intrinsic!");
7647     case X86::BI__builtin_ia32_cmpps:
7648       ID = Intrinsic::x86_sse_cmp_ps;
7649       break;
7650     case X86::BI__builtin_ia32_cmpps256:
7651       ID = Intrinsic::x86_avx_cmp_ps_256;
7652       break;
7653     case X86::BI__builtin_ia32_cmppd:
7654       ID = Intrinsic::x86_sse2_cmp_pd;
7655       break;
7656     case X86::BI__builtin_ia32_cmppd256:
7657       ID = Intrinsic::x86_avx_cmp_pd_256;
7658       break;
7659     }
7660 
7661     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7662   }
7663 
7664   // SSE scalar comparison intrinsics
7665   case X86::BI__builtin_ia32_cmpeqss:
7666     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
7667   case X86::BI__builtin_ia32_cmpltss:
7668     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
7669   case X86::BI__builtin_ia32_cmpless:
7670     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
7671   case X86::BI__builtin_ia32_cmpunordss:
7672     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
7673   case X86::BI__builtin_ia32_cmpneqss:
7674     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
7675   case X86::BI__builtin_ia32_cmpnltss:
7676     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
7677   case X86::BI__builtin_ia32_cmpnless:
7678     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
7679   case X86::BI__builtin_ia32_cmpordss:
7680     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
7681   case X86::BI__builtin_ia32_cmpeqsd:
7682     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
7683   case X86::BI__builtin_ia32_cmpltsd:
7684     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
7685   case X86::BI__builtin_ia32_cmplesd:
7686     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
7687   case X86::BI__builtin_ia32_cmpunordsd:
7688     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
7689   case X86::BI__builtin_ia32_cmpneqsd:
7690     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
7691   case X86::BI__builtin_ia32_cmpnltsd:
7692     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
7693   case X86::BI__builtin_ia32_cmpnlesd:
7694     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
7695   case X86::BI__builtin_ia32_cmpordsd:
7696     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
7697 
7698   case X86::BI__emul:
7699   case X86::BI__emulu: {
7700     llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
7701     bool isSigned = (BuiltinID == X86::BI__emul);
7702     Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
7703     Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
7704     return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
7705   }
7706   case X86::BI__mulh:
7707   case X86::BI__umulh:
7708   case X86::BI_mul128:
7709   case X86::BI_umul128: {
7710     llvm::Type *ResType = ConvertType(E->getType());
7711     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
7712 
7713     bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
7714     Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
7715     Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
7716 
7717     Value *MulResult, *HigherBits;
7718     if (IsSigned) {
7719       MulResult = Builder.CreateNSWMul(LHS, RHS);
7720       HigherBits = Builder.CreateAShr(MulResult, 64);
7721     } else {
7722       MulResult = Builder.CreateNUWMul(LHS, RHS);
7723       HigherBits = Builder.CreateLShr(MulResult, 64);
7724     }
7725     HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
7726 
7727     if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
7728       return HigherBits;
7729 
7730     Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
7731     Builder.CreateStore(HigherBits, HighBitsAddress);
7732     return Builder.CreateIntCast(MulResult, ResType, IsSigned);
7733   }
7734 
7735   case X86::BI__faststorefence: {
7736     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
7737                                llvm::CrossThread);
7738   }
7739   case X86::BI_ReadWriteBarrier:
7740   case X86::BI_ReadBarrier:
7741   case X86::BI_WriteBarrier: {
7742     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
7743                                llvm::SingleThread);
7744   }
7745   case X86::BI_BitScanForward:
7746   case X86::BI_BitScanForward64:
7747     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
7748   case X86::BI_BitScanReverse:
7749   case X86::BI_BitScanReverse64:
7750     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
7751 
7752   case X86::BI_InterlockedAnd64:
7753     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
7754   case X86::BI_InterlockedExchange64:
7755     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
7756   case X86::BI_InterlockedExchangeAdd64:
7757     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
7758   case X86::BI_InterlockedExchangeSub64:
7759     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
7760   case X86::BI_InterlockedOr64:
7761     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
7762   case X86::BI_InterlockedXor64:
7763     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
7764   case X86::BI_InterlockedDecrement64:
7765     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
7766   case X86::BI_InterlockedIncrement64:
7767     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
7768 
7769   case X86::BI_AddressOfReturnAddress: {
7770     Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
7771     return Builder.CreateCall(F);
7772   }
7773   case X86::BI__stosb: {
7774     // We treat __stosb as a volatile memset - it may not generate "rep stosb"
7775     // instruction, but it will create a memset that won't be optimized away.
7776     return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
7777   }
7778   }
7779 }
7780 
7781 
7782 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
7783                                            const CallExpr *E) {
7784   SmallVector<Value*, 4> Ops;
7785 
7786   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
7787     Ops.push_back(EmitScalarExpr(E->getArg(i)));
7788 
7789   Intrinsic::ID ID = Intrinsic::not_intrinsic;
7790 
7791   switch (BuiltinID) {
7792   default: return nullptr;
7793 
7794   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
7795   // call __builtin_readcyclecounter.
7796   case PPC::BI__builtin_ppc_get_timebase:
7797     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
7798 
7799   // vec_ld, vec_lvsl, vec_lvsr
7800   case PPC::BI__builtin_altivec_lvx:
7801   case PPC::BI__builtin_altivec_lvxl:
7802   case PPC::BI__builtin_altivec_lvebx:
7803   case PPC::BI__builtin_altivec_lvehx:
7804   case PPC::BI__builtin_altivec_lvewx:
7805   case PPC::BI__builtin_altivec_lvsl:
7806   case PPC::BI__builtin_altivec_lvsr:
7807   case PPC::BI__builtin_vsx_lxvd2x:
7808   case PPC::BI__builtin_vsx_lxvw4x:
7809   {
7810     Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
7811 
7812     Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
7813     Ops.pop_back();
7814 
7815     switch (BuiltinID) {
7816     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
7817     case PPC::BI__builtin_altivec_lvx:
7818       ID = Intrinsic::ppc_altivec_lvx;
7819       break;
7820     case PPC::BI__builtin_altivec_lvxl:
7821       ID = Intrinsic::ppc_altivec_lvxl;
7822       break;
7823     case PPC::BI__builtin_altivec_lvebx:
7824       ID = Intrinsic::ppc_altivec_lvebx;
7825       break;
7826     case PPC::BI__builtin_altivec_lvehx:
7827       ID = Intrinsic::ppc_altivec_lvehx;
7828       break;
7829     case PPC::BI__builtin_altivec_lvewx:
7830       ID = Intrinsic::ppc_altivec_lvewx;
7831       break;
7832     case PPC::BI__builtin_altivec_lvsl:
7833       ID = Intrinsic::ppc_altivec_lvsl;
7834       break;
7835     case PPC::BI__builtin_altivec_lvsr:
7836       ID = Intrinsic::ppc_altivec_lvsr;
7837       break;
7838     case PPC::BI__builtin_vsx_lxvd2x:
7839       ID = Intrinsic::ppc_vsx_lxvd2x;
7840       break;
7841     case PPC::BI__builtin_vsx_lxvw4x:
7842       ID = Intrinsic::ppc_vsx_lxvw4x;
7843       break;
7844     }
7845     llvm::Function *F = CGM.getIntrinsic(ID);
7846     return Builder.CreateCall(F, Ops, "");
7847   }
7848 
7849   // vec_st
7850   case PPC::BI__builtin_altivec_stvx:
7851   case PPC::BI__builtin_altivec_stvxl:
7852   case PPC::BI__builtin_altivec_stvebx:
7853   case PPC::BI__builtin_altivec_stvehx:
7854   case PPC::BI__builtin_altivec_stvewx:
7855   case PPC::BI__builtin_vsx_stxvd2x:
7856   case PPC::BI__builtin_vsx_stxvw4x:
7857   {
7858     Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
7859     Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
7860     Ops.pop_back();
7861 
7862     switch (BuiltinID) {
7863     default: llvm_unreachable("Unsupported st intrinsic!");
7864     case PPC::BI__builtin_altivec_stvx:
7865       ID = Intrinsic::ppc_altivec_stvx;
7866       break;
7867     case PPC::BI__builtin_altivec_stvxl:
7868       ID = Intrinsic::ppc_altivec_stvxl;
7869       break;
7870     case PPC::BI__builtin_altivec_stvebx:
7871       ID = Intrinsic::ppc_altivec_stvebx;
7872       break;
7873     case PPC::BI__builtin_altivec_stvehx:
7874       ID = Intrinsic::ppc_altivec_stvehx;
7875       break;
7876     case PPC::BI__builtin_altivec_stvewx:
7877       ID = Intrinsic::ppc_altivec_stvewx;
7878       break;
7879     case PPC::BI__builtin_vsx_stxvd2x:
7880       ID = Intrinsic::ppc_vsx_stxvd2x;
7881       break;
7882     case PPC::BI__builtin_vsx_stxvw4x:
7883       ID = Intrinsic::ppc_vsx_stxvw4x;
7884       break;
7885     }
7886     llvm::Function *F = CGM.getIntrinsic(ID);
7887     return Builder.CreateCall(F, Ops, "");
7888   }
7889   // Square root
7890   case PPC::BI__builtin_vsx_xvsqrtsp:
7891   case PPC::BI__builtin_vsx_xvsqrtdp: {
7892     llvm::Type *ResultType = ConvertType(E->getType());
7893     Value *X = EmitScalarExpr(E->getArg(0));
7894     ID = Intrinsic::sqrt;
7895     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
7896     return Builder.CreateCall(F, X);
7897   }
7898   // Count leading zeros
7899   case PPC::BI__builtin_altivec_vclzb:
7900   case PPC::BI__builtin_altivec_vclzh:
7901   case PPC::BI__builtin_altivec_vclzw:
7902   case PPC::BI__builtin_altivec_vclzd: {
7903     llvm::Type *ResultType = ConvertType(E->getType());
7904     Value *X = EmitScalarExpr(E->getArg(0));
7905     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7906     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
7907     return Builder.CreateCall(F, {X, Undef});
7908   }
7909   case PPC::BI__builtin_altivec_vctzb:
7910   case PPC::BI__builtin_altivec_vctzh:
7911   case PPC::BI__builtin_altivec_vctzw:
7912   case PPC::BI__builtin_altivec_vctzd: {
7913     llvm::Type *ResultType = ConvertType(E->getType());
7914     Value *X = EmitScalarExpr(E->getArg(0));
7915     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7916     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
7917     return Builder.CreateCall(F, {X, Undef});
7918   }
7919   case PPC::BI__builtin_altivec_vpopcntb:
7920   case PPC::BI__builtin_altivec_vpopcnth:
7921   case PPC::BI__builtin_altivec_vpopcntw:
7922   case PPC::BI__builtin_altivec_vpopcntd: {
7923     llvm::Type *ResultType = ConvertType(E->getType());
7924     Value *X = EmitScalarExpr(E->getArg(0));
7925     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
7926     return Builder.CreateCall(F, X);
7927   }
7928   // Copy sign
7929   case PPC::BI__builtin_vsx_xvcpsgnsp:
7930   case PPC::BI__builtin_vsx_xvcpsgndp: {
7931     llvm::Type *ResultType = ConvertType(E->getType());
7932     Value *X = EmitScalarExpr(E->getArg(0));
7933     Value *Y = EmitScalarExpr(E->getArg(1));
7934     ID = Intrinsic::copysign;
7935     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
7936     return Builder.CreateCall(F, {X, Y});
7937   }
7938   // Rounding/truncation
7939   case PPC::BI__builtin_vsx_xvrspip:
7940   case PPC::BI__builtin_vsx_xvrdpip:
7941   case PPC::BI__builtin_vsx_xvrdpim:
7942   case PPC::BI__builtin_vsx_xvrspim:
7943   case PPC::BI__builtin_vsx_xvrdpi:
7944   case PPC::BI__builtin_vsx_xvrspi:
7945   case PPC::BI__builtin_vsx_xvrdpic:
7946   case PPC::BI__builtin_vsx_xvrspic:
7947   case PPC::BI__builtin_vsx_xvrdpiz:
7948   case PPC::BI__builtin_vsx_xvrspiz: {
7949     llvm::Type *ResultType = ConvertType(E->getType());
7950     Value *X = EmitScalarExpr(E->getArg(0));
7951     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
7952         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
7953       ID = Intrinsic::floor;
7954     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
7955              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
7956       ID = Intrinsic::round;
7957     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
7958              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
7959       ID = Intrinsic::nearbyint;
7960     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
7961              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
7962       ID = Intrinsic::ceil;
7963     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
7964              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
7965       ID = Intrinsic::trunc;
7966     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
7967     return Builder.CreateCall(F, X);
7968   }
7969 
7970   // Absolute value
7971   case PPC::BI__builtin_vsx_xvabsdp:
7972   case PPC::BI__builtin_vsx_xvabssp: {
7973     llvm::Type *ResultType = ConvertType(E->getType());
7974     Value *X = EmitScalarExpr(E->getArg(0));
7975     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7976     return Builder.CreateCall(F, X);
7977   }
7978 
7979   // FMA variations
7980   case PPC::BI__builtin_vsx_xvmaddadp:
7981   case PPC::BI__builtin_vsx_xvmaddasp:
7982   case PPC::BI__builtin_vsx_xvnmaddadp:
7983   case PPC::BI__builtin_vsx_xvnmaddasp:
7984   case PPC::BI__builtin_vsx_xvmsubadp:
7985   case PPC::BI__builtin_vsx_xvmsubasp:
7986   case PPC::BI__builtin_vsx_xvnmsubadp:
7987   case PPC::BI__builtin_vsx_xvnmsubasp: {
7988     llvm::Type *ResultType = ConvertType(E->getType());
7989     Value *X = EmitScalarExpr(E->getArg(0));
7990     Value *Y = EmitScalarExpr(E->getArg(1));
7991     Value *Z = EmitScalarExpr(E->getArg(2));
7992     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7993     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7994     switch (BuiltinID) {
7995       case PPC::BI__builtin_vsx_xvmaddadp:
7996       case PPC::BI__builtin_vsx_xvmaddasp:
7997         return Builder.CreateCall(F, {X, Y, Z});
7998       case PPC::BI__builtin_vsx_xvnmaddadp:
7999       case PPC::BI__builtin_vsx_xvnmaddasp:
8000         return Builder.CreateFSub(Zero,
8001                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
8002       case PPC::BI__builtin_vsx_xvmsubadp:
8003       case PPC::BI__builtin_vsx_xvmsubasp:
8004         return Builder.CreateCall(F,
8005                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8006       case PPC::BI__builtin_vsx_xvnmsubadp:
8007       case PPC::BI__builtin_vsx_xvnmsubasp:
8008         Value *FsubRes =
8009           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8010         return Builder.CreateFSub(Zero, FsubRes, "sub");
8011     }
8012     llvm_unreachable("Unknown FMA operation");
8013     return nullptr; // Suppress no-return warning
8014   }
8015   }
8016 }
8017 
8018 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
8019                                               const CallExpr *E) {
8020   switch (BuiltinID) {
8021   case AMDGPU::BI__builtin_amdgcn_div_scale:
8022   case AMDGPU::BI__builtin_amdgcn_div_scalef: {
8023     // Translate from the intrinsics's struct return to the builtin's out
8024     // argument.
8025 
8026     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
8027 
8028     llvm::Value *X = EmitScalarExpr(E->getArg(0));
8029     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
8030     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
8031 
8032     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
8033                                            X->getType());
8034 
8035     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
8036 
8037     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
8038     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
8039 
8040     llvm::Type *RealFlagType
8041       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
8042 
8043     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
8044     Builder.CreateStore(FlagExt, FlagOutPtr);
8045     return Result;
8046   }
8047   case AMDGPU::BI__builtin_amdgcn_div_fmas:
8048   case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
8049     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
8050     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
8051     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
8052     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
8053 
8054     llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
8055                                       Src0->getType());
8056     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
8057     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
8058   }
8059 
8060   case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
8061     return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
8062   case AMDGPU::BI__builtin_amdgcn_div_fixup:
8063   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
8064     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
8065   case AMDGPU::BI__builtin_amdgcn_trig_preop:
8066   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
8067     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
8068   case AMDGPU::BI__builtin_amdgcn_rcp:
8069   case AMDGPU::BI__builtin_amdgcn_rcpf:
8070     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
8071   case AMDGPU::BI__builtin_amdgcn_rsq:
8072   case AMDGPU::BI__builtin_amdgcn_rsqf:
8073     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
8074   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
8075   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
8076     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
8077   case AMDGPU::BI__builtin_amdgcn_sinf:
8078     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
8079   case AMDGPU::BI__builtin_amdgcn_cosf:
8080     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
8081   case AMDGPU::BI__builtin_amdgcn_log_clampf:
8082     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
8083   case AMDGPU::BI__builtin_amdgcn_ldexp:
8084   case AMDGPU::BI__builtin_amdgcn_ldexpf:
8085     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
8086   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
8087   case AMDGPU::BI__builtin_amdgcn_frexp_mantf: {
8088     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
8089   }
8090   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
8091   case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
8092     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_exp);
8093   }
8094   case AMDGPU::BI__builtin_amdgcn_fract:
8095   case AMDGPU::BI__builtin_amdgcn_fractf:
8096     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
8097   case AMDGPU::BI__builtin_amdgcn_lerp:
8098     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
8099   case AMDGPU::BI__builtin_amdgcn_uicmp:
8100   case AMDGPU::BI__builtin_amdgcn_uicmpl:
8101   case AMDGPU::BI__builtin_amdgcn_sicmp:
8102   case AMDGPU::BI__builtin_amdgcn_sicmpl:
8103     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
8104   case AMDGPU::BI__builtin_amdgcn_fcmp:
8105   case AMDGPU::BI__builtin_amdgcn_fcmpf:
8106     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
8107   case AMDGPU::BI__builtin_amdgcn_class:
8108   case AMDGPU::BI__builtin_amdgcn_classf:
8109     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
8110 
8111   case AMDGPU::BI__builtin_amdgcn_read_exec: {
8112     CallInst *CI = cast<CallInst>(
8113       EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
8114     CI->setConvergent();
8115     return CI;
8116   }
8117 
8118   // amdgcn workitem
8119   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
8120     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
8121   case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
8122     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
8123   case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
8124     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
8125 
8126   // r600 intrinsics
8127   case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
8128   case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
8129     return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
8130   case AMDGPU::BI__builtin_r600_read_tidig_x:
8131     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
8132   case AMDGPU::BI__builtin_r600_read_tidig_y:
8133     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
8134   case AMDGPU::BI__builtin_r600_read_tidig_z:
8135     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
8136   default:
8137     return nullptr;
8138   }
8139 }
8140 
8141 /// Handle a SystemZ function in which the final argument is a pointer
8142 /// to an int that receives the post-instruction CC value.  At the LLVM level
8143 /// this is represented as a function that returns a {result, cc} pair.
8144 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
8145                                          unsigned IntrinsicID,
8146                                          const CallExpr *E) {
8147   unsigned NumArgs = E->getNumArgs() - 1;
8148   SmallVector<Value *, 8> Args(NumArgs);
8149   for (unsigned I = 0; I < NumArgs; ++I)
8150     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
8151   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
8152   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
8153   Value *Call = CGF.Builder.CreateCall(F, Args);
8154   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
8155   CGF.Builder.CreateStore(CC, CCPtr);
8156   return CGF.Builder.CreateExtractValue(Call, 0);
8157 }
8158 
8159 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
8160                                                const CallExpr *E) {
8161   switch (BuiltinID) {
8162   case SystemZ::BI__builtin_tbegin: {
8163     Value *TDB = EmitScalarExpr(E->getArg(0));
8164     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8165     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
8166     return Builder.CreateCall(F, {TDB, Control});
8167   }
8168   case SystemZ::BI__builtin_tbegin_nofloat: {
8169     Value *TDB = EmitScalarExpr(E->getArg(0));
8170     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8171     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
8172     return Builder.CreateCall(F, {TDB, Control});
8173   }
8174   case SystemZ::BI__builtin_tbeginc: {
8175     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
8176     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
8177     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
8178     return Builder.CreateCall(F, {TDB, Control});
8179   }
8180   case SystemZ::BI__builtin_tabort: {
8181     Value *Data = EmitScalarExpr(E->getArg(0));
8182     Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
8183     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
8184   }
8185   case SystemZ::BI__builtin_non_tx_store: {
8186     Value *Address = EmitScalarExpr(E->getArg(0));
8187     Value *Data = EmitScalarExpr(E->getArg(1));
8188     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
8189     return Builder.CreateCall(F, {Data, Address});
8190   }
8191 
8192   // Vector builtins.  Note that most vector builtins are mapped automatically
8193   // to target-specific LLVM intrinsics.  The ones handled specially here can
8194   // be represented via standard LLVM IR, which is preferable to enable common
8195   // LLVM optimizations.
8196 
8197   case SystemZ::BI__builtin_s390_vpopctb:
8198   case SystemZ::BI__builtin_s390_vpopcth:
8199   case SystemZ::BI__builtin_s390_vpopctf:
8200   case SystemZ::BI__builtin_s390_vpopctg: {
8201     llvm::Type *ResultType = ConvertType(E->getType());
8202     Value *X = EmitScalarExpr(E->getArg(0));
8203     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8204     return Builder.CreateCall(F, X);
8205   }
8206 
8207   case SystemZ::BI__builtin_s390_vclzb:
8208   case SystemZ::BI__builtin_s390_vclzh:
8209   case SystemZ::BI__builtin_s390_vclzf:
8210   case SystemZ::BI__builtin_s390_vclzg: {
8211     llvm::Type *ResultType = ConvertType(E->getType());
8212     Value *X = EmitScalarExpr(E->getArg(0));
8213     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8214     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8215     return Builder.CreateCall(F, {X, Undef});
8216   }
8217 
8218   case SystemZ::BI__builtin_s390_vctzb:
8219   case SystemZ::BI__builtin_s390_vctzh:
8220   case SystemZ::BI__builtin_s390_vctzf:
8221   case SystemZ::BI__builtin_s390_vctzg: {
8222     llvm::Type *ResultType = ConvertType(E->getType());
8223     Value *X = EmitScalarExpr(E->getArg(0));
8224     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8225     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8226     return Builder.CreateCall(F, {X, Undef});
8227   }
8228 
8229   case SystemZ::BI__builtin_s390_vfsqdb: {
8230     llvm::Type *ResultType = ConvertType(E->getType());
8231     Value *X = EmitScalarExpr(E->getArg(0));
8232     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
8233     return Builder.CreateCall(F, X);
8234   }
8235   case SystemZ::BI__builtin_s390_vfmadb: {
8236     llvm::Type *ResultType = ConvertType(E->getType());
8237     Value *X = EmitScalarExpr(E->getArg(0));
8238     Value *Y = EmitScalarExpr(E->getArg(1));
8239     Value *Z = EmitScalarExpr(E->getArg(2));
8240     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8241     return Builder.CreateCall(F, {X, Y, Z});
8242   }
8243   case SystemZ::BI__builtin_s390_vfmsdb: {
8244     llvm::Type *ResultType = ConvertType(E->getType());
8245     Value *X = EmitScalarExpr(E->getArg(0));
8246     Value *Y = EmitScalarExpr(E->getArg(1));
8247     Value *Z = EmitScalarExpr(E->getArg(2));
8248     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8249     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8250     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8251   }
8252   case SystemZ::BI__builtin_s390_vflpdb: {
8253     llvm::Type *ResultType = ConvertType(E->getType());
8254     Value *X = EmitScalarExpr(E->getArg(0));
8255     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8256     return Builder.CreateCall(F, X);
8257   }
8258   case SystemZ::BI__builtin_s390_vflndb: {
8259     llvm::Type *ResultType = ConvertType(E->getType());
8260     Value *X = EmitScalarExpr(E->getArg(0));
8261     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8262     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8263     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
8264   }
8265   case SystemZ::BI__builtin_s390_vfidb: {
8266     llvm::Type *ResultType = ConvertType(E->getType());
8267     Value *X = EmitScalarExpr(E->getArg(0));
8268     // Constant-fold the M4 and M5 mask arguments.
8269     llvm::APSInt M4, M5;
8270     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
8271     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
8272     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
8273     (void)IsConstM4; (void)IsConstM5;
8274     // Check whether this instance of vfidb can be represented via a LLVM
8275     // standard intrinsic.  We only support some combinations of M4 and M5.
8276     Intrinsic::ID ID = Intrinsic::not_intrinsic;
8277     switch (M4.getZExtValue()) {
8278     default: break;
8279     case 0:  // IEEE-inexact exception allowed
8280       switch (M5.getZExtValue()) {
8281       default: break;
8282       case 0: ID = Intrinsic::rint; break;
8283       }
8284       break;
8285     case 4:  // IEEE-inexact exception suppressed
8286       switch (M5.getZExtValue()) {
8287       default: break;
8288       case 0: ID = Intrinsic::nearbyint; break;
8289       case 1: ID = Intrinsic::round; break;
8290       case 5: ID = Intrinsic::trunc; break;
8291       case 6: ID = Intrinsic::ceil; break;
8292       case 7: ID = Intrinsic::floor; break;
8293       }
8294       break;
8295     }
8296     if (ID != Intrinsic::not_intrinsic) {
8297       Function *F = CGM.getIntrinsic(ID, ResultType);
8298       return Builder.CreateCall(F, X);
8299     }
8300     Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
8301     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
8302     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
8303     return Builder.CreateCall(F, {X, M4Value, M5Value});
8304   }
8305 
8306   // Vector intrisincs that output the post-instruction CC value.
8307 
8308 #define INTRINSIC_WITH_CC(NAME) \
8309     case SystemZ::BI__builtin_##NAME: \
8310       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
8311 
8312   INTRINSIC_WITH_CC(s390_vpkshs);
8313   INTRINSIC_WITH_CC(s390_vpksfs);
8314   INTRINSIC_WITH_CC(s390_vpksgs);
8315 
8316   INTRINSIC_WITH_CC(s390_vpklshs);
8317   INTRINSIC_WITH_CC(s390_vpklsfs);
8318   INTRINSIC_WITH_CC(s390_vpklsgs);
8319 
8320   INTRINSIC_WITH_CC(s390_vceqbs);
8321   INTRINSIC_WITH_CC(s390_vceqhs);
8322   INTRINSIC_WITH_CC(s390_vceqfs);
8323   INTRINSIC_WITH_CC(s390_vceqgs);
8324 
8325   INTRINSIC_WITH_CC(s390_vchbs);
8326   INTRINSIC_WITH_CC(s390_vchhs);
8327   INTRINSIC_WITH_CC(s390_vchfs);
8328   INTRINSIC_WITH_CC(s390_vchgs);
8329 
8330   INTRINSIC_WITH_CC(s390_vchlbs);
8331   INTRINSIC_WITH_CC(s390_vchlhs);
8332   INTRINSIC_WITH_CC(s390_vchlfs);
8333   INTRINSIC_WITH_CC(s390_vchlgs);
8334 
8335   INTRINSIC_WITH_CC(s390_vfaebs);
8336   INTRINSIC_WITH_CC(s390_vfaehs);
8337   INTRINSIC_WITH_CC(s390_vfaefs);
8338 
8339   INTRINSIC_WITH_CC(s390_vfaezbs);
8340   INTRINSIC_WITH_CC(s390_vfaezhs);
8341   INTRINSIC_WITH_CC(s390_vfaezfs);
8342 
8343   INTRINSIC_WITH_CC(s390_vfeebs);
8344   INTRINSIC_WITH_CC(s390_vfeehs);
8345   INTRINSIC_WITH_CC(s390_vfeefs);
8346 
8347   INTRINSIC_WITH_CC(s390_vfeezbs);
8348   INTRINSIC_WITH_CC(s390_vfeezhs);
8349   INTRINSIC_WITH_CC(s390_vfeezfs);
8350 
8351   INTRINSIC_WITH_CC(s390_vfenebs);
8352   INTRINSIC_WITH_CC(s390_vfenehs);
8353   INTRINSIC_WITH_CC(s390_vfenefs);
8354 
8355   INTRINSIC_WITH_CC(s390_vfenezbs);
8356   INTRINSIC_WITH_CC(s390_vfenezhs);
8357   INTRINSIC_WITH_CC(s390_vfenezfs);
8358 
8359   INTRINSIC_WITH_CC(s390_vistrbs);
8360   INTRINSIC_WITH_CC(s390_vistrhs);
8361   INTRINSIC_WITH_CC(s390_vistrfs);
8362 
8363   INTRINSIC_WITH_CC(s390_vstrcbs);
8364   INTRINSIC_WITH_CC(s390_vstrchs);
8365   INTRINSIC_WITH_CC(s390_vstrcfs);
8366 
8367   INTRINSIC_WITH_CC(s390_vstrczbs);
8368   INTRINSIC_WITH_CC(s390_vstrczhs);
8369   INTRINSIC_WITH_CC(s390_vstrczfs);
8370 
8371   INTRINSIC_WITH_CC(s390_vfcedbs);
8372   INTRINSIC_WITH_CC(s390_vfchdbs);
8373   INTRINSIC_WITH_CC(s390_vfchedbs);
8374 
8375   INTRINSIC_WITH_CC(s390_vftcidb);
8376 
8377 #undef INTRINSIC_WITH_CC
8378 
8379   default:
8380     return nullptr;
8381   }
8382 }
8383 
8384 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
8385                                              const CallExpr *E) {
8386   auto MakeLdg = [&](unsigned IntrinsicID) {
8387     Value *Ptr = EmitScalarExpr(E->getArg(0));
8388     AlignmentSource AlignSource;
8389     clang::CharUnits Align =
8390         getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource);
8391     return Builder.CreateCall(
8392         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
8393                                        Ptr->getType()}),
8394         {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
8395   };
8396   auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
8397     Value *Ptr = EmitScalarExpr(E->getArg(0));
8398     return Builder.CreateCall(
8399         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
8400                                        Ptr->getType()}),
8401         {Ptr, EmitScalarExpr(E->getArg(1))});
8402   };
8403   switch (BuiltinID) {
8404   case NVPTX::BI__nvvm_atom_add_gen_i:
8405   case NVPTX::BI__nvvm_atom_add_gen_l:
8406   case NVPTX::BI__nvvm_atom_add_gen_ll:
8407     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
8408 
8409   case NVPTX::BI__nvvm_atom_sub_gen_i:
8410   case NVPTX::BI__nvvm_atom_sub_gen_l:
8411   case NVPTX::BI__nvvm_atom_sub_gen_ll:
8412     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
8413 
8414   case NVPTX::BI__nvvm_atom_and_gen_i:
8415   case NVPTX::BI__nvvm_atom_and_gen_l:
8416   case NVPTX::BI__nvvm_atom_and_gen_ll:
8417     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
8418 
8419   case NVPTX::BI__nvvm_atom_or_gen_i:
8420   case NVPTX::BI__nvvm_atom_or_gen_l:
8421   case NVPTX::BI__nvvm_atom_or_gen_ll:
8422     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
8423 
8424   case NVPTX::BI__nvvm_atom_xor_gen_i:
8425   case NVPTX::BI__nvvm_atom_xor_gen_l:
8426   case NVPTX::BI__nvvm_atom_xor_gen_ll:
8427     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
8428 
8429   case NVPTX::BI__nvvm_atom_xchg_gen_i:
8430   case NVPTX::BI__nvvm_atom_xchg_gen_l:
8431   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
8432     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
8433 
8434   case NVPTX::BI__nvvm_atom_max_gen_i:
8435   case NVPTX::BI__nvvm_atom_max_gen_l:
8436   case NVPTX::BI__nvvm_atom_max_gen_ll:
8437     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
8438 
8439   case NVPTX::BI__nvvm_atom_max_gen_ui:
8440   case NVPTX::BI__nvvm_atom_max_gen_ul:
8441   case NVPTX::BI__nvvm_atom_max_gen_ull:
8442     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
8443 
8444   case NVPTX::BI__nvvm_atom_min_gen_i:
8445   case NVPTX::BI__nvvm_atom_min_gen_l:
8446   case NVPTX::BI__nvvm_atom_min_gen_ll:
8447     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
8448 
8449   case NVPTX::BI__nvvm_atom_min_gen_ui:
8450   case NVPTX::BI__nvvm_atom_min_gen_ul:
8451   case NVPTX::BI__nvvm_atom_min_gen_ull:
8452     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
8453 
8454   case NVPTX::BI__nvvm_atom_cas_gen_i:
8455   case NVPTX::BI__nvvm_atom_cas_gen_l:
8456   case NVPTX::BI__nvvm_atom_cas_gen_ll:
8457     // __nvvm_atom_cas_gen_* should return the old value rather than the
8458     // success flag.
8459     return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
8460 
8461   case NVPTX::BI__nvvm_atom_add_gen_f: {
8462     Value *Ptr = EmitScalarExpr(E->getArg(0));
8463     Value *Val = EmitScalarExpr(E->getArg(1));
8464     // atomicrmw only deals with integer arguments so we need to use
8465     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
8466     Value *FnALAF32 =
8467         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
8468     return Builder.CreateCall(FnALAF32, {Ptr, Val});
8469   }
8470 
8471   case NVPTX::BI__nvvm_atom_inc_gen_ui: {
8472     Value *Ptr = EmitScalarExpr(E->getArg(0));
8473     Value *Val = EmitScalarExpr(E->getArg(1));
8474     Value *FnALI32 =
8475         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
8476     return Builder.CreateCall(FnALI32, {Ptr, Val});
8477   }
8478 
8479   case NVPTX::BI__nvvm_atom_dec_gen_ui: {
8480     Value *Ptr = EmitScalarExpr(E->getArg(0));
8481     Value *Val = EmitScalarExpr(E->getArg(1));
8482     Value *FnALD32 =
8483         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
8484     return Builder.CreateCall(FnALD32, {Ptr, Val});
8485   }
8486 
8487   case NVPTX::BI__nvvm_ldg_c:
8488   case NVPTX::BI__nvvm_ldg_c2:
8489   case NVPTX::BI__nvvm_ldg_c4:
8490   case NVPTX::BI__nvvm_ldg_s:
8491   case NVPTX::BI__nvvm_ldg_s2:
8492   case NVPTX::BI__nvvm_ldg_s4:
8493   case NVPTX::BI__nvvm_ldg_i:
8494   case NVPTX::BI__nvvm_ldg_i2:
8495   case NVPTX::BI__nvvm_ldg_i4:
8496   case NVPTX::BI__nvvm_ldg_l:
8497   case NVPTX::BI__nvvm_ldg_ll:
8498   case NVPTX::BI__nvvm_ldg_ll2:
8499   case NVPTX::BI__nvvm_ldg_uc:
8500   case NVPTX::BI__nvvm_ldg_uc2:
8501   case NVPTX::BI__nvvm_ldg_uc4:
8502   case NVPTX::BI__nvvm_ldg_us:
8503   case NVPTX::BI__nvvm_ldg_us2:
8504   case NVPTX::BI__nvvm_ldg_us4:
8505   case NVPTX::BI__nvvm_ldg_ui:
8506   case NVPTX::BI__nvvm_ldg_ui2:
8507   case NVPTX::BI__nvvm_ldg_ui4:
8508   case NVPTX::BI__nvvm_ldg_ul:
8509   case NVPTX::BI__nvvm_ldg_ull:
8510   case NVPTX::BI__nvvm_ldg_ull2:
8511     // PTX Interoperability section 2.2: "For a vector with an even number of
8512     // elements, its alignment is set to number of elements times the alignment
8513     // of its member: n*alignof(t)."
8514     return MakeLdg(Intrinsic::nvvm_ldg_global_i);
8515   case NVPTX::BI__nvvm_ldg_f:
8516   case NVPTX::BI__nvvm_ldg_f2:
8517   case NVPTX::BI__nvvm_ldg_f4:
8518   case NVPTX::BI__nvvm_ldg_d:
8519   case NVPTX::BI__nvvm_ldg_d2:
8520     return MakeLdg(Intrinsic::nvvm_ldg_global_f);
8521 
8522   case NVPTX::BI__nvvm_atom_cta_add_gen_i:
8523   case NVPTX::BI__nvvm_atom_cta_add_gen_l:
8524   case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
8525     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
8526   case NVPTX::BI__nvvm_atom_sys_add_gen_i:
8527   case NVPTX::BI__nvvm_atom_sys_add_gen_l:
8528   case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
8529     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
8530   case NVPTX::BI__nvvm_atom_cta_add_gen_f:
8531   case NVPTX::BI__nvvm_atom_cta_add_gen_d:
8532     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
8533   case NVPTX::BI__nvvm_atom_sys_add_gen_f:
8534   case NVPTX::BI__nvvm_atom_sys_add_gen_d:
8535     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
8536   case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
8537   case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
8538   case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
8539     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
8540   case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
8541   case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
8542   case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
8543     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
8544   case NVPTX::BI__nvvm_atom_cta_max_gen_i:
8545   case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
8546   case NVPTX::BI__nvvm_atom_cta_max_gen_l:
8547   case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
8548   case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
8549   case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
8550     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
8551   case NVPTX::BI__nvvm_atom_sys_max_gen_i:
8552   case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
8553   case NVPTX::BI__nvvm_atom_sys_max_gen_l:
8554   case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
8555   case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
8556   case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
8557     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
8558   case NVPTX::BI__nvvm_atom_cta_min_gen_i:
8559   case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
8560   case NVPTX::BI__nvvm_atom_cta_min_gen_l:
8561   case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
8562   case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
8563   case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
8564     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
8565   case NVPTX::BI__nvvm_atom_sys_min_gen_i:
8566   case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
8567   case NVPTX::BI__nvvm_atom_sys_min_gen_l:
8568   case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
8569   case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
8570   case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
8571     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
8572   case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
8573     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
8574   case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
8575     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
8576   case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
8577     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
8578   case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
8579     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
8580   case NVPTX::BI__nvvm_atom_cta_and_gen_i:
8581   case NVPTX::BI__nvvm_atom_cta_and_gen_l:
8582   case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
8583     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
8584   case NVPTX::BI__nvvm_atom_sys_and_gen_i:
8585   case NVPTX::BI__nvvm_atom_sys_and_gen_l:
8586   case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
8587     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
8588   case NVPTX::BI__nvvm_atom_cta_or_gen_i:
8589   case NVPTX::BI__nvvm_atom_cta_or_gen_l:
8590   case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
8591     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
8592   case NVPTX::BI__nvvm_atom_sys_or_gen_i:
8593   case NVPTX::BI__nvvm_atom_sys_or_gen_l:
8594   case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
8595     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
8596   case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
8597   case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
8598   case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
8599     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
8600   case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
8601   case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
8602   case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
8603     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
8604   case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
8605   case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
8606   case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
8607     Value *Ptr = EmitScalarExpr(E->getArg(0));
8608     return Builder.CreateCall(
8609         CGM.getIntrinsic(
8610             Intrinsic::nvvm_atomic_cas_gen_i_cta,
8611             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
8612         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
8613   }
8614   case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
8615   case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
8616   case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
8617     Value *Ptr = EmitScalarExpr(E->getArg(0));
8618     return Builder.CreateCall(
8619         CGM.getIntrinsic(
8620             Intrinsic::nvvm_atomic_cas_gen_i_sys,
8621             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
8622         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
8623   }
8624   default:
8625     return nullptr;
8626   }
8627 }
8628 
8629 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
8630                                                    const CallExpr *E) {
8631   switch (BuiltinID) {
8632   case WebAssembly::BI__builtin_wasm_current_memory: {
8633     llvm::Type *ResultType = ConvertType(E->getType());
8634     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
8635     return Builder.CreateCall(Callee);
8636   }
8637   case WebAssembly::BI__builtin_wasm_grow_memory: {
8638     Value *X = EmitScalarExpr(E->getArg(0));
8639     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
8640     return Builder.CreateCall(Callee, X);
8641   }
8642 
8643   default:
8644     return nullptr;
8645   }
8646 }
8647