1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGObjCRuntime.h"
16 #include "CGOpenCLRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "CodeGenModule.h"
19 #include "ConstantEmitter.h"
20 #include "TargetInfo.h"
21 #include "clang/AST/ASTContext.h"
22 #include "clang/AST/Decl.h"
23 #include "clang/Analysis/Analyses/OSLog.h"
24 #include "clang/Basic/TargetBuiltins.h"
25 #include "clang/Basic/TargetInfo.h"
26 #include "clang/CodeGen/CGFunctionInfo.h"
27 #include "llvm/ADT/StringExtras.h"
28 #include "llvm/IR/CallSite.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/InlineAsm.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/MDBuilder.h"
33 #include "llvm/Support/ScopedPrinter.h"
34 #include "llvm/Support/ConvertUTF.h"
35 #include <sstream>
36 
37 using namespace clang;
38 using namespace CodeGen;
39 using namespace llvm;
40 
41 static
42 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
43   return std::min(High, std::max(Low, Value));
44 }
45 
46 /// getBuiltinLibFunction - Given a builtin id for a function like
47 /// "__builtin_fabsf", return a Function* for "fabsf".
48 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
49                                                      unsigned BuiltinID) {
50   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
51 
52   // Get the name, skip over the __builtin_ prefix (if necessary).
53   StringRef Name;
54   GlobalDecl D(FD);
55 
56   // If the builtin has been declared explicitly with an assembler label,
57   // use the mangled name. This differs from the plain label on platforms
58   // that prefix labels.
59   if (FD->hasAttr<AsmLabelAttr>())
60     Name = getMangledName(D);
61   else
62     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
63 
64   llvm::FunctionType *Ty =
65     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
66 
67   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
68 }
69 
70 /// Emit the conversions required to turn the given value into an
71 /// integer of the given size.
72 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
73                         QualType T, llvm::IntegerType *IntType) {
74   V = CGF.EmitToMemory(V, T);
75 
76   if (V->getType()->isPointerTy())
77     return CGF.Builder.CreatePtrToInt(V, IntType);
78 
79   assert(V->getType() == IntType);
80   return V;
81 }
82 
83 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
84                           QualType T, llvm::Type *ResultType) {
85   V = CGF.EmitFromMemory(V, T);
86 
87   if (ResultType->isPointerTy())
88     return CGF.Builder.CreateIntToPtr(V, ResultType);
89 
90   assert(V->getType() == ResultType);
91   return V;
92 }
93 
94 /// Utility to insert an atomic instruction based on Instrinsic::ID
95 /// and the expression node.
96 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
97                                     llvm::AtomicRMWInst::BinOp Kind,
98                                     const CallExpr *E) {
99   QualType T = E->getType();
100   assert(E->getArg(0)->getType()->isPointerType());
101   assert(CGF.getContext().hasSameUnqualifiedType(T,
102                                   E->getArg(0)->getType()->getPointeeType()));
103   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
104 
105   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
106   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
107 
108   llvm::IntegerType *IntType =
109     llvm::IntegerType::get(CGF.getLLVMContext(),
110                            CGF.getContext().getTypeSize(T));
111   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
112 
113   llvm::Value *Args[2];
114   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
115   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
116   llvm::Type *ValueType = Args[1]->getType();
117   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
118 
119   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
120       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
121   return EmitFromInt(CGF, Result, T, ValueType);
122 }
123 
124 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
125   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
126   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
127 
128   // Convert the type of the pointer to a pointer to the stored type.
129   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
130   Value *BC = CGF.Builder.CreateBitCast(
131       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
132   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
133   LV.setNontemporal(true);
134   CGF.EmitStoreOfScalar(Val, LV, false);
135   return nullptr;
136 }
137 
138 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
139   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
140 
141   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
142   LV.setNontemporal(true);
143   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
144 }
145 
146 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
147                                llvm::AtomicRMWInst::BinOp Kind,
148                                const CallExpr *E) {
149   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
150 }
151 
152 /// Utility to insert an atomic instruction based Instrinsic::ID and
153 /// the expression node, where the return value is the result of the
154 /// operation.
155 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
156                                    llvm::AtomicRMWInst::BinOp Kind,
157                                    const CallExpr *E,
158                                    Instruction::BinaryOps Op,
159                                    bool Invert = false) {
160   QualType T = E->getType();
161   assert(E->getArg(0)->getType()->isPointerType());
162   assert(CGF.getContext().hasSameUnqualifiedType(T,
163                                   E->getArg(0)->getType()->getPointeeType()));
164   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
165 
166   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
167   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
168 
169   llvm::IntegerType *IntType =
170     llvm::IntegerType::get(CGF.getLLVMContext(),
171                            CGF.getContext().getTypeSize(T));
172   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
173 
174   llvm::Value *Args[2];
175   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
176   llvm::Type *ValueType = Args[1]->getType();
177   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
178   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
179 
180   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
181       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
182   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
183   if (Invert)
184     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
185                                      llvm::ConstantInt::get(IntType, -1));
186   Result = EmitFromInt(CGF, Result, T, ValueType);
187   return RValue::get(Result);
188 }
189 
190 /// @brief Utility to insert an atomic cmpxchg instruction.
191 ///
192 /// @param CGF The current codegen function.
193 /// @param E   Builtin call expression to convert to cmpxchg.
194 ///            arg0 - address to operate on
195 ///            arg1 - value to compare with
196 ///            arg2 - new value
197 /// @param ReturnBool Specifies whether to return success flag of
198 ///                   cmpxchg result or the old value.
199 ///
200 /// @returns result of cmpxchg, according to ReturnBool
201 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
202                                      bool ReturnBool) {
203   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
204   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
205   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
206 
207   llvm::IntegerType *IntType = llvm::IntegerType::get(
208       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
209   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
210 
211   Value *Args[3];
212   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
213   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
214   llvm::Type *ValueType = Args[1]->getType();
215   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
216   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
217 
218   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
219       Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
220       llvm::AtomicOrdering::SequentiallyConsistent);
221   if (ReturnBool)
222     // Extract boolean success flag and zext it to int.
223     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
224                                   CGF.ConvertType(E->getType()));
225   else
226     // Extract old value and emit it using the same type as compare value.
227     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
228                        ValueType);
229 }
230 
231 // Emit a simple mangled intrinsic that has 1 argument and a return type
232 // matching the argument type.
233 static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
234                                const CallExpr *E,
235                                unsigned IntrinsicID) {
236   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
237 
238   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
239   return CGF.Builder.CreateCall(F, Src0);
240 }
241 
242 // Emit an intrinsic that has 2 operands of the same type as its result.
243 static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
244                                 const CallExpr *E,
245                                 unsigned IntrinsicID) {
246   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
247   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
248 
249   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
250   return CGF.Builder.CreateCall(F, { Src0, Src1 });
251 }
252 
253 // Emit an intrinsic that has 3 operands of the same type as its result.
254 static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
255                                  const CallExpr *E,
256                                  unsigned IntrinsicID) {
257   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
258   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
259   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
260 
261   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
262   return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
263 }
264 
265 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
266 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
267                                const CallExpr *E,
268                                unsigned IntrinsicID) {
269   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
270   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
271 
272   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
273   return CGF.Builder.CreateCall(F, {Src0, Src1});
274 }
275 
276 /// EmitFAbs - Emit a call to @llvm.fabs().
277 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
278   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
279   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
280   Call->setDoesNotAccessMemory();
281   return Call;
282 }
283 
284 /// Emit the computation of the sign bit for a floating point value. Returns
285 /// the i1 sign bit value.
286 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
287   LLVMContext &C = CGF.CGM.getLLVMContext();
288 
289   llvm::Type *Ty = V->getType();
290   int Width = Ty->getPrimitiveSizeInBits();
291   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
292   V = CGF.Builder.CreateBitCast(V, IntTy);
293   if (Ty->isPPC_FP128Ty()) {
294     // We want the sign bit of the higher-order double. The bitcast we just
295     // did works as if the double-double was stored to memory and then
296     // read as an i128. The "store" will put the higher-order double in the
297     // lower address in both little- and big-Endian modes, but the "load"
298     // will treat those bits as a different part of the i128: the low bits in
299     // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
300     // we need to shift the high bits down to the low before truncating.
301     Width >>= 1;
302     if (CGF.getTarget().isBigEndian()) {
303       Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
304       V = CGF.Builder.CreateLShr(V, ShiftCst);
305     }
306     // We are truncating value in order to extract the higher-order
307     // double, which we will be using to extract the sign from.
308     IntTy = llvm::IntegerType::get(C, Width);
309     V = CGF.Builder.CreateTrunc(V, IntTy);
310   }
311   Value *Zero = llvm::Constant::getNullValue(IntTy);
312   return CGF.Builder.CreateICmpSLT(V, Zero);
313 }
314 
315 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
316                               const CallExpr *E, llvm::Constant *calleeValue) {
317   CGCallee callee = CGCallee::forDirect(calleeValue, FD);
318   return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
319 }
320 
321 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
322 /// depending on IntrinsicID.
323 ///
324 /// \arg CGF The current codegen function.
325 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
326 /// \arg X The first argument to the llvm.*.with.overflow.*.
327 /// \arg Y The second argument to the llvm.*.with.overflow.*.
328 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
329 /// \returns The result (i.e. sum/product) returned by the intrinsic.
330 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
331                                           const llvm::Intrinsic::ID IntrinsicID,
332                                           llvm::Value *X, llvm::Value *Y,
333                                           llvm::Value *&Carry) {
334   // Make sure we have integers of the same width.
335   assert(X->getType() == Y->getType() &&
336          "Arguments must be the same type. (Did you forget to make sure both "
337          "arguments have the same integer width?)");
338 
339   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
340   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
341   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
342   return CGF.Builder.CreateExtractValue(Tmp, 0);
343 }
344 
345 static Value *emitRangedBuiltin(CodeGenFunction &CGF,
346                                 unsigned IntrinsicID,
347                                 int low, int high) {
348     llvm::MDBuilder MDHelper(CGF.getLLVMContext());
349     llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
350     Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
351     llvm::Instruction *Call = CGF.Builder.CreateCall(F);
352     Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
353     return Call;
354 }
355 
356 namespace {
357   struct WidthAndSignedness {
358     unsigned Width;
359     bool Signed;
360   };
361 }
362 
363 static WidthAndSignedness
364 getIntegerWidthAndSignedness(const clang::ASTContext &context,
365                              const clang::QualType Type) {
366   assert(Type->isIntegerType() && "Given type is not an integer.");
367   unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
368   bool Signed = Type->isSignedIntegerType();
369   return {Width, Signed};
370 }
371 
372 // Given one or more integer types, this function produces an integer type that
373 // encompasses them: any value in one of the given types could be expressed in
374 // the encompassing type.
375 static struct WidthAndSignedness
376 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
377   assert(Types.size() > 0 && "Empty list of types.");
378 
379   // If any of the given types is signed, we must return a signed type.
380   bool Signed = false;
381   for (const auto &Type : Types) {
382     Signed |= Type.Signed;
383   }
384 
385   // The encompassing type must have a width greater than or equal to the width
386   // of the specified types.  Aditionally, if the encompassing type is signed,
387   // its width must be strictly greater than the width of any unsigned types
388   // given.
389   unsigned Width = 0;
390   for (const auto &Type : Types) {
391     unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
392     if (Width < MinWidth) {
393       Width = MinWidth;
394     }
395   }
396 
397   return {Width, Signed};
398 }
399 
400 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
401   llvm::Type *DestType = Int8PtrTy;
402   if (ArgValue->getType() != DestType)
403     ArgValue =
404         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
405 
406   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
407   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
408 }
409 
410 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
411 /// __builtin_object_size(p, @p To) is correct
412 static bool areBOSTypesCompatible(int From, int To) {
413   // Note: Our __builtin_object_size implementation currently treats Type=0 and
414   // Type=2 identically. Encoding this implementation detail here may make
415   // improving __builtin_object_size difficult in the future, so it's omitted.
416   return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
417 }
418 
419 static llvm::Value *
420 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
421   return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
422 }
423 
424 llvm::Value *
425 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
426                                                  llvm::IntegerType *ResType,
427                                                  llvm::Value *EmittedE) {
428   uint64_t ObjectSize;
429   if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
430     return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
431   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
432 }
433 
434 /// Returns a Value corresponding to the size of the given expression.
435 /// This Value may be either of the following:
436 ///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
437 ///     it)
438 ///   - A call to the @llvm.objectsize intrinsic
439 ///
440 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
441 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
442 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
443 llvm::Value *
444 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
445                                        llvm::IntegerType *ResType,
446                                        llvm::Value *EmittedE) {
447   // We need to reference an argument if the pointer is a parameter with the
448   // pass_object_size attribute.
449   if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
450     auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
451     auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
452     if (Param != nullptr && PS != nullptr &&
453         areBOSTypesCompatible(PS->getType(), Type)) {
454       auto Iter = SizeArguments.find(Param);
455       assert(Iter != SizeArguments.end());
456 
457       const ImplicitParamDecl *D = Iter->second;
458       auto DIter = LocalDeclMap.find(D);
459       assert(DIter != LocalDeclMap.end());
460 
461       return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
462                               getContext().getSizeType(), E->getLocStart());
463     }
464   }
465 
466   // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
467   // evaluate E for side-effects. In either case, we shouldn't lower to
468   // @llvm.objectsize.
469   if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
470     return getDefaultBuiltinObjectSizeResult(Type, ResType);
471 
472   Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
473   assert(Ptr->getType()->isPointerTy() &&
474          "Non-pointer passed to __builtin_object_size?");
475 
476   Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
477 
478   // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
479   Value *Min = Builder.getInt1((Type & 2) != 0);
480   // For GCC compatability, __builtin_object_size treat NULL as unknown size.
481   Value *NullIsUnknown = Builder.getTrue();
482   return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
483 }
484 
485 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
486 // handle them here.
487 enum class CodeGenFunction::MSVCIntrin {
488   _BitScanForward,
489   _BitScanReverse,
490   _InterlockedAnd,
491   _InterlockedDecrement,
492   _InterlockedExchange,
493   _InterlockedExchangeAdd,
494   _InterlockedExchangeSub,
495   _InterlockedIncrement,
496   _InterlockedOr,
497   _InterlockedXor,
498   _interlockedbittestandset,
499   __fastfail,
500 };
501 
502 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
503                                             const CallExpr *E) {
504   switch (BuiltinID) {
505   case MSVCIntrin::_BitScanForward:
506   case MSVCIntrin::_BitScanReverse: {
507     Value *ArgValue = EmitScalarExpr(E->getArg(1));
508 
509     llvm::Type *ArgType = ArgValue->getType();
510     llvm::Type *IndexType =
511       EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
512     llvm::Type *ResultType = ConvertType(E->getType());
513 
514     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
515     Value *ResZero = llvm::Constant::getNullValue(ResultType);
516     Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
517 
518     BasicBlock *Begin = Builder.GetInsertBlock();
519     BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
520     Builder.SetInsertPoint(End);
521     PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
522 
523     Builder.SetInsertPoint(Begin);
524     Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
525     BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
526     Builder.CreateCondBr(IsZero, End, NotZero);
527     Result->addIncoming(ResZero, Begin);
528 
529     Builder.SetInsertPoint(NotZero);
530     Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
531 
532     if (BuiltinID == MSVCIntrin::_BitScanForward) {
533       Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
534       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
535       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
536       Builder.CreateStore(ZeroCount, IndexAddress, false);
537     } else {
538       unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
539       Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
540 
541       Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
542       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
543       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
544       Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
545       Builder.CreateStore(Index, IndexAddress, false);
546     }
547     Builder.CreateBr(End);
548     Result->addIncoming(ResOne, NotZero);
549 
550     Builder.SetInsertPoint(End);
551     return Result;
552   }
553   case MSVCIntrin::_InterlockedAnd:
554     return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
555   case MSVCIntrin::_InterlockedExchange:
556     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
557   case MSVCIntrin::_InterlockedExchangeAdd:
558     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
559   case MSVCIntrin::_InterlockedExchangeSub:
560     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
561   case MSVCIntrin::_InterlockedOr:
562     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
563   case MSVCIntrin::_InterlockedXor:
564     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
565 
566   case MSVCIntrin::_interlockedbittestandset: {
567     llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
568     llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
569     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
570         AtomicRMWInst::Or, Addr,
571         Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
572         llvm::AtomicOrdering::SequentiallyConsistent);
573     // Shift the relevant bit to the least significant position, truncate to
574     // the result type, and test the low bit.
575     llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
576     llvm::Value *Truncated =
577         Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
578     return Builder.CreateAnd(Truncated,
579                              ConstantInt::get(Truncated->getType(), 1));
580   }
581 
582   case MSVCIntrin::_InterlockedDecrement: {
583     llvm::Type *IntTy = ConvertType(E->getType());
584     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
585       AtomicRMWInst::Sub,
586       EmitScalarExpr(E->getArg(0)),
587       ConstantInt::get(IntTy, 1),
588       llvm::AtomicOrdering::SequentiallyConsistent);
589     return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
590   }
591   case MSVCIntrin::_InterlockedIncrement: {
592     llvm::Type *IntTy = ConvertType(E->getType());
593     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
594       AtomicRMWInst::Add,
595       EmitScalarExpr(E->getArg(0)),
596       ConstantInt::get(IntTy, 1),
597       llvm::AtomicOrdering::SequentiallyConsistent);
598     return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
599   }
600 
601   case MSVCIntrin::__fastfail: {
602     // Request immediate process termination from the kernel. The instruction
603     // sequences to do this are documented on MSDN:
604     // https://msdn.microsoft.com/en-us/library/dn774154.aspx
605     llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
606     StringRef Asm, Constraints;
607     switch (ISA) {
608     default:
609       ErrorUnsupported(E, "__fastfail call for this architecture");
610       break;
611     case llvm::Triple::x86:
612     case llvm::Triple::x86_64:
613       Asm = "int $$0x29";
614       Constraints = "{cx}";
615       break;
616     case llvm::Triple::thumb:
617       Asm = "udf #251";
618       Constraints = "{r0}";
619       break;
620     }
621     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
622     llvm::InlineAsm *IA =
623         llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
624     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
625         getLLVMContext(), llvm::AttributeList::FunctionIndex,
626         llvm::Attribute::NoReturn);
627     CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
628     CS.setAttributes(NoReturnAttr);
629     return CS.getInstruction();
630   }
631   }
632   llvm_unreachable("Incorrect MSVC intrinsic!");
633 }
634 
635 namespace {
636 // ARC cleanup for __builtin_os_log_format
637 struct CallObjCArcUse final : EHScopeStack::Cleanup {
638   CallObjCArcUse(llvm::Value *object) : object(object) {}
639   llvm::Value *object;
640 
641   void Emit(CodeGenFunction &CGF, Flags flags) override {
642     CGF.EmitARCIntrinsicUse(object);
643   }
644 };
645 }
646 
647 Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
648                                                  BuiltinCheckKind Kind) {
649   assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
650           && "Unsupported builtin check kind");
651 
652   Value *ArgValue = EmitScalarExpr(E);
653   if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef())
654     return ArgValue;
655 
656   SanitizerScope SanScope(this);
657   Value *Cond = Builder.CreateICmpNE(
658       ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
659   EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
660             SanitizerHandler::InvalidBuiltin,
661             {EmitCheckSourceLocation(E->getExprLoc()),
662              llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
663             None);
664   return ArgValue;
665 }
666 
667 /// Get the argument type for arguments to os_log_helper.
668 static CanQualType getOSLogArgType(ASTContext &C, int Size) {
669   QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
670   return C.getCanonicalType(UnsignedTy);
671 }
672 
673 llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
674     const analyze_os_log::OSLogBufferLayout &Layout,
675     CharUnits BufferAlignment) {
676   ASTContext &Ctx = getContext();
677 
678   llvm::SmallString<64> Name;
679   {
680     raw_svector_ostream OS(Name);
681     OS << "__os_log_helper";
682     OS << "_" << BufferAlignment.getQuantity();
683     OS << "_" << int(Layout.getSummaryByte());
684     OS << "_" << int(Layout.getNumArgsByte());
685     for (const auto &Item : Layout.Items)
686       OS << "_" << int(Item.getSizeByte()) << "_"
687          << int(Item.getDescriptorByte());
688   }
689 
690   if (llvm::Function *F = CGM.getModule().getFunction(Name))
691     return F;
692 
693   llvm::SmallVector<ImplicitParamDecl, 4> Params;
694   Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"),
695                       Ctx.VoidPtrTy, ImplicitParamDecl::Other);
696 
697   for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
698     char Size = Layout.Items[I].getSizeByte();
699     if (!Size)
700       continue;
701 
702     Params.emplace_back(
703         Ctx, nullptr, SourceLocation(),
704         &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)),
705         getOSLogArgType(Ctx, Size), ImplicitParamDecl::Other);
706   }
707 
708   FunctionArgList Args;
709   for (auto &P : Params)
710     Args.push_back(&P);
711 
712   // The helper function has linkonce_odr linkage to enable the linker to merge
713   // identical functions. To ensure the merging always happens, 'noinline' is
714   // attached to the function when compiling with -Oz.
715   const CGFunctionInfo &FI =
716       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
717   llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
718   llvm::Function *Fn = llvm::Function::Create(
719       FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
720   Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
721   CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn);
722   CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
723 
724   // Attach 'noinline' at -Oz.
725   if (CGM.getCodeGenOpts().OptimizeSize == 2)
726     Fn->addFnAttr(llvm::Attribute::NoInline);
727 
728   auto NL = ApplyDebugLocation::CreateEmpty(*this);
729   IdentifierInfo *II = &Ctx.Idents.get(Name);
730   FunctionDecl *FD = FunctionDecl::Create(
731       Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
732       Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false);
733 
734   StartFunction(FD, Ctx.VoidTy, Fn, FI, Args);
735 
736   // Create a scope with an artificial location for the body of this function.
737   auto AL = ApplyDebugLocation::CreateArtificial(*this);
738 
739   CharUnits Offset;
740   Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"),
741                   BufferAlignment);
742   Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
743                       Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
744   Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
745                       Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
746 
747   unsigned I = 1;
748   for (const auto &Item : Layout.Items) {
749     Builder.CreateStore(
750         Builder.getInt8(Item.getDescriptorByte()),
751         Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
752     Builder.CreateStore(
753         Builder.getInt8(Item.getSizeByte()),
754         Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
755 
756     CharUnits Size = Item.size();
757     if (!Size.getQuantity())
758       continue;
759 
760     Address Arg = GetAddrOfLocalVar(&Params[I]);
761     Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
762     Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(),
763                                  "argDataCast");
764     Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
765     Offset += Size;
766     ++I;
767   }
768 
769   FinishFunction();
770 
771   return Fn;
772 }
773 
774 RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
775   assert(E.getNumArgs() >= 2 &&
776          "__builtin_os_log_format takes at least 2 arguments");
777   ASTContext &Ctx = getContext();
778   analyze_os_log::OSLogBufferLayout Layout;
779   analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout);
780   Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
781   llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
782 
783   // Ignore argument 1, the format string. It is not currently used.
784   CallArgList Args;
785   Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
786 
787   for (const auto &Item : Layout.Items) {
788     int Size = Item.getSizeByte();
789     if (!Size)
790       continue;
791 
792     llvm::Value *ArgVal;
793 
794     if (const Expr *TheExpr = Item.getExpr()) {
795       ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
796 
797       // Check if this is a retainable type.
798       if (TheExpr->getType()->isObjCRetainableType()) {
799         assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
800                "Only scalar can be a ObjC retainable type");
801         // Check if the object is constant, if not, save it in
802         // RetainableOperands.
803         if (!isa<Constant>(ArgVal))
804           RetainableOperands.push_back(ArgVal);
805       }
806     } else {
807       ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
808     }
809 
810     unsigned ArgValSize =
811         CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
812     llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
813                                                      ArgValSize);
814     ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
815     CanQualType ArgTy = getOSLogArgType(Ctx, Size);
816     // If ArgVal has type x86_fp80, zero-extend ArgVal.
817     ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
818     Args.add(RValue::get(ArgVal), ArgTy);
819   }
820 
821   const CGFunctionInfo &FI =
822       CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
823   llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
824       Layout, BufAddr.getAlignment());
825   EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
826 
827   // Push a clang.arc.use cleanup for each object in RetainableOperands. The
828   // cleanup will cause the use to appear after the final log call, keeping
829   // the object valid while it’s held in the log buffer.  Note that if there’s
830   // a release cleanup on the object, it will already be active; since
831   // cleanups are emitted in reverse order, the use will occur before the
832   // object is released.
833   if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
834       CGM.getCodeGenOpts().OptimizationLevel != 0)
835     for (llvm::Value *Object : RetainableOperands)
836       pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object);
837 
838   return RValue::get(BufAddr.getPointer());
839 }
840 
841 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
842                                         unsigned BuiltinID, const CallExpr *E,
843                                         ReturnValueSlot ReturnValue) {
844   // See if we can constant fold this builtin.  If so, don't emit it at all.
845   Expr::EvalResult Result;
846   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
847       !Result.hasSideEffects()) {
848     if (Result.Val.isInt())
849       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
850                                                 Result.Val.getInt()));
851     if (Result.Val.isFloat())
852       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
853                                                Result.Val.getFloat()));
854   }
855 
856   switch (BuiltinID) {
857   default: break;  // Handle intrinsics and libm functions below.
858   case Builtin::BI__builtin___CFStringMakeConstantString:
859   case Builtin::BI__builtin___NSStringMakeConstantString:
860     return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
861   case Builtin::BI__builtin_stdarg_start:
862   case Builtin::BI__builtin_va_start:
863   case Builtin::BI__va_start:
864   case Builtin::BI__builtin_va_end:
865     return RValue::get(
866         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
867                            ? EmitScalarExpr(E->getArg(0))
868                            : EmitVAListRef(E->getArg(0)).getPointer(),
869                        BuiltinID != Builtin::BI__builtin_va_end));
870   case Builtin::BI__builtin_va_copy: {
871     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
872     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
873 
874     llvm::Type *Type = Int8PtrTy;
875 
876     DstPtr = Builder.CreateBitCast(DstPtr, Type);
877     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
878     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
879                                           {DstPtr, SrcPtr}));
880   }
881   case Builtin::BI__builtin_abs:
882   case Builtin::BI__builtin_labs:
883   case Builtin::BI__builtin_llabs: {
884     Value *ArgValue = EmitScalarExpr(E->getArg(0));
885 
886     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
887     Value *CmpResult =
888     Builder.CreateICmpSGE(ArgValue,
889                           llvm::Constant::getNullValue(ArgValue->getType()),
890                                                             "abscond");
891     Value *Result =
892       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
893 
894     return RValue::get(Result);
895   }
896   case Builtin::BI__builtin_fabs:
897   case Builtin::BI__builtin_fabsf:
898   case Builtin::BI__builtin_fabsl: {
899     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
900   }
901   case Builtin::BI__builtin_fmod:
902   case Builtin::BI__builtin_fmodf:
903   case Builtin::BI__builtin_fmodl: {
904     Value *Arg1 = EmitScalarExpr(E->getArg(0));
905     Value *Arg2 = EmitScalarExpr(E->getArg(1));
906     Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
907     return RValue::get(Result);
908   }
909   case Builtin::BI__builtin_copysign:
910   case Builtin::BI__builtin_copysignf:
911   case Builtin::BI__builtin_copysignl: {
912     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
913   }
914   case Builtin::BI__builtin_ceil:
915   case Builtin::BI__builtin_ceilf:
916   case Builtin::BI__builtin_ceill: {
917     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
918   }
919   case Builtin::BI__builtin_floor:
920   case Builtin::BI__builtin_floorf:
921   case Builtin::BI__builtin_floorl: {
922     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
923   }
924   case Builtin::BI__builtin_trunc:
925   case Builtin::BI__builtin_truncf:
926   case Builtin::BI__builtin_truncl: {
927     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
928   }
929   case Builtin::BI__builtin_rint:
930   case Builtin::BI__builtin_rintf:
931   case Builtin::BI__builtin_rintl: {
932     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
933   }
934   case Builtin::BI__builtin_nearbyint:
935   case Builtin::BI__builtin_nearbyintf:
936   case Builtin::BI__builtin_nearbyintl: {
937     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
938   }
939   case Builtin::BI__builtin_round:
940   case Builtin::BI__builtin_roundf:
941   case Builtin::BI__builtin_roundl: {
942     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
943   }
944   case Builtin::BI__builtin_fmin:
945   case Builtin::BI__builtin_fminf:
946   case Builtin::BI__builtin_fminl: {
947     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
948   }
949   case Builtin::BI__builtin_fmax:
950   case Builtin::BI__builtin_fmaxf:
951   case Builtin::BI__builtin_fmaxl: {
952     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
953   }
954   case Builtin::BI__builtin_conj:
955   case Builtin::BI__builtin_conjf:
956   case Builtin::BI__builtin_conjl: {
957     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
958     Value *Real = ComplexVal.first;
959     Value *Imag = ComplexVal.second;
960     Value *Zero =
961       Imag->getType()->isFPOrFPVectorTy()
962         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
963         : llvm::Constant::getNullValue(Imag->getType());
964 
965     Imag = Builder.CreateFSub(Zero, Imag, "sub");
966     return RValue::getComplex(std::make_pair(Real, Imag));
967   }
968   case Builtin::BI__builtin_creal:
969   case Builtin::BI__builtin_crealf:
970   case Builtin::BI__builtin_creall:
971   case Builtin::BIcreal:
972   case Builtin::BIcrealf:
973   case Builtin::BIcreall: {
974     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
975     return RValue::get(ComplexVal.first);
976   }
977 
978   case Builtin::BI__builtin_cimag:
979   case Builtin::BI__builtin_cimagf:
980   case Builtin::BI__builtin_cimagl:
981   case Builtin::BIcimag:
982   case Builtin::BIcimagf:
983   case Builtin::BIcimagl: {
984     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
985     return RValue::get(ComplexVal.second);
986   }
987 
988   case Builtin::BI__builtin_ctzs:
989   case Builtin::BI__builtin_ctz:
990   case Builtin::BI__builtin_ctzl:
991   case Builtin::BI__builtin_ctzll: {
992     Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
993 
994     llvm::Type *ArgType = ArgValue->getType();
995     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
996 
997     llvm::Type *ResultType = ConvertType(E->getType());
998     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
999     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1000     if (Result->getType() != ResultType)
1001       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1002                                      "cast");
1003     return RValue::get(Result);
1004   }
1005   case Builtin::BI__builtin_clzs:
1006   case Builtin::BI__builtin_clz:
1007   case Builtin::BI__builtin_clzl:
1008   case Builtin::BI__builtin_clzll: {
1009     Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
1010 
1011     llvm::Type *ArgType = ArgValue->getType();
1012     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1013 
1014     llvm::Type *ResultType = ConvertType(E->getType());
1015     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
1016     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1017     if (Result->getType() != ResultType)
1018       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1019                                      "cast");
1020     return RValue::get(Result);
1021   }
1022   case Builtin::BI__builtin_ffs:
1023   case Builtin::BI__builtin_ffsl:
1024   case Builtin::BI__builtin_ffsll: {
1025     // ffs(x) -> x ? cttz(x) + 1 : 0
1026     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1027 
1028     llvm::Type *ArgType = ArgValue->getType();
1029     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1030 
1031     llvm::Type *ResultType = ConvertType(E->getType());
1032     Value *Tmp =
1033         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
1034                           llvm::ConstantInt::get(ArgType, 1));
1035     Value *Zero = llvm::Constant::getNullValue(ArgType);
1036     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
1037     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
1038     if (Result->getType() != ResultType)
1039       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1040                                      "cast");
1041     return RValue::get(Result);
1042   }
1043   case Builtin::BI__builtin_parity:
1044   case Builtin::BI__builtin_parityl:
1045   case Builtin::BI__builtin_parityll: {
1046     // parity(x) -> ctpop(x) & 1
1047     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1048 
1049     llvm::Type *ArgType = ArgValue->getType();
1050     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1051 
1052     llvm::Type *ResultType = ConvertType(E->getType());
1053     Value *Tmp = Builder.CreateCall(F, ArgValue);
1054     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
1055     if (Result->getType() != ResultType)
1056       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1057                                      "cast");
1058     return RValue::get(Result);
1059   }
1060   case Builtin::BI__popcnt16:
1061   case Builtin::BI__popcnt:
1062   case Builtin::BI__popcnt64:
1063   case Builtin::BI__builtin_popcount:
1064   case Builtin::BI__builtin_popcountl:
1065   case Builtin::BI__builtin_popcountll: {
1066     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1067 
1068     llvm::Type *ArgType = ArgValue->getType();
1069     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1070 
1071     llvm::Type *ResultType = ConvertType(E->getType());
1072     Value *Result = Builder.CreateCall(F, ArgValue);
1073     if (Result->getType() != ResultType)
1074       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1075                                      "cast");
1076     return RValue::get(Result);
1077   }
1078   case Builtin::BI_rotr8:
1079   case Builtin::BI_rotr16:
1080   case Builtin::BI_rotr:
1081   case Builtin::BI_lrotr:
1082   case Builtin::BI_rotr64: {
1083     Value *Val = EmitScalarExpr(E->getArg(0));
1084     Value *Shift = EmitScalarExpr(E->getArg(1));
1085 
1086     llvm::Type *ArgType = Val->getType();
1087     Shift = Builder.CreateIntCast(Shift, ArgType, false);
1088     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1089     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
1090     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1091 
1092     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
1093     Shift = Builder.CreateAnd(Shift, Mask);
1094     Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
1095 
1096     Value *RightShifted = Builder.CreateLShr(Val, Shift);
1097     Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
1098     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
1099 
1100     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
1101     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
1102     return RValue::get(Result);
1103   }
1104   case Builtin::BI_rotl8:
1105   case Builtin::BI_rotl16:
1106   case Builtin::BI_rotl:
1107   case Builtin::BI_lrotl:
1108   case Builtin::BI_rotl64: {
1109     Value *Val = EmitScalarExpr(E->getArg(0));
1110     Value *Shift = EmitScalarExpr(E->getArg(1));
1111 
1112     llvm::Type *ArgType = Val->getType();
1113     Shift = Builder.CreateIntCast(Shift, ArgType, false);
1114     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1115     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
1116     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1117 
1118     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
1119     Shift = Builder.CreateAnd(Shift, Mask);
1120     Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
1121 
1122     Value *LeftShifted = Builder.CreateShl(Val, Shift);
1123     Value *RightShifted = Builder.CreateLShr(Val, RightShift);
1124     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
1125 
1126     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
1127     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
1128     return RValue::get(Result);
1129   }
1130   case Builtin::BI__builtin_unpredictable: {
1131     // Always return the argument of __builtin_unpredictable. LLVM does not
1132     // handle this builtin. Metadata for this builtin should be added directly
1133     // to instructions such as branches or switches that use it.
1134     return RValue::get(EmitScalarExpr(E->getArg(0)));
1135   }
1136   case Builtin::BI__builtin_expect: {
1137     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1138     llvm::Type *ArgType = ArgValue->getType();
1139 
1140     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
1141     // Don't generate llvm.expect on -O0 as the backend won't use it for
1142     // anything.
1143     // Note, we still IRGen ExpectedValue because it could have side-effects.
1144     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
1145       return RValue::get(ArgValue);
1146 
1147     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
1148     Value *Result =
1149         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
1150     return RValue::get(Result);
1151   }
1152   case Builtin::BI__builtin_assume_aligned: {
1153     Value *PtrValue = EmitScalarExpr(E->getArg(0));
1154     Value *OffsetValue =
1155       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
1156 
1157     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
1158     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
1159     unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
1160 
1161     EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
1162     return RValue::get(PtrValue);
1163   }
1164   case Builtin::BI__assume:
1165   case Builtin::BI__builtin_assume: {
1166     if (E->getArg(0)->HasSideEffects(getContext()))
1167       return RValue::get(nullptr);
1168 
1169     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1170     Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
1171     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
1172   }
1173   case Builtin::BI__builtin_bswap16:
1174   case Builtin::BI__builtin_bswap32:
1175   case Builtin::BI__builtin_bswap64: {
1176     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
1177   }
1178   case Builtin::BI__builtin_bitreverse8:
1179   case Builtin::BI__builtin_bitreverse16:
1180   case Builtin::BI__builtin_bitreverse32:
1181   case Builtin::BI__builtin_bitreverse64: {
1182     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
1183   }
1184   case Builtin::BI__builtin_object_size: {
1185     unsigned Type =
1186         E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
1187     auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
1188 
1189     // We pass this builtin onto the optimizer so that it can figure out the
1190     // object size in more complex cases.
1191     return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
1192                                              /*EmittedE=*/nullptr));
1193   }
1194   case Builtin::BI__builtin_prefetch: {
1195     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
1196     // FIXME: Technically these constants should of type 'int', yes?
1197     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
1198       llvm::ConstantInt::get(Int32Ty, 0);
1199     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
1200       llvm::ConstantInt::get(Int32Ty, 3);
1201     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
1202     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
1203     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
1204   }
1205   case Builtin::BI__builtin_readcyclecounter: {
1206     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
1207     return RValue::get(Builder.CreateCall(F));
1208   }
1209   case Builtin::BI__builtin___clear_cache: {
1210     Value *Begin = EmitScalarExpr(E->getArg(0));
1211     Value *End = EmitScalarExpr(E->getArg(1));
1212     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
1213     return RValue::get(Builder.CreateCall(F, {Begin, End}));
1214   }
1215   case Builtin::BI__builtin_trap:
1216     return RValue::get(EmitTrapCall(Intrinsic::trap));
1217   case Builtin::BI__debugbreak:
1218     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
1219   case Builtin::BI__builtin_unreachable: {
1220     if (SanOpts.has(SanitizerKind::Unreachable)) {
1221       SanitizerScope SanScope(this);
1222       EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
1223                                SanitizerKind::Unreachable),
1224                 SanitizerHandler::BuiltinUnreachable,
1225                 EmitCheckSourceLocation(E->getExprLoc()), None);
1226     } else
1227       Builder.CreateUnreachable();
1228 
1229     // We do need to preserve an insertion point.
1230     EmitBlock(createBasicBlock("unreachable.cont"));
1231 
1232     return RValue::get(nullptr);
1233   }
1234 
1235   case Builtin::BI__builtin_powi:
1236   case Builtin::BI__builtin_powif:
1237   case Builtin::BI__builtin_powil: {
1238     Value *Base = EmitScalarExpr(E->getArg(0));
1239     Value *Exponent = EmitScalarExpr(E->getArg(1));
1240     llvm::Type *ArgType = Base->getType();
1241     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
1242     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1243   }
1244 
1245   case Builtin::BI__builtin_isgreater:
1246   case Builtin::BI__builtin_isgreaterequal:
1247   case Builtin::BI__builtin_isless:
1248   case Builtin::BI__builtin_islessequal:
1249   case Builtin::BI__builtin_islessgreater:
1250   case Builtin::BI__builtin_isunordered: {
1251     // Ordered comparisons: we know the arguments to these are matching scalar
1252     // floating point values.
1253     Value *LHS = EmitScalarExpr(E->getArg(0));
1254     Value *RHS = EmitScalarExpr(E->getArg(1));
1255 
1256     switch (BuiltinID) {
1257     default: llvm_unreachable("Unknown ordered comparison");
1258     case Builtin::BI__builtin_isgreater:
1259       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
1260       break;
1261     case Builtin::BI__builtin_isgreaterequal:
1262       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
1263       break;
1264     case Builtin::BI__builtin_isless:
1265       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
1266       break;
1267     case Builtin::BI__builtin_islessequal:
1268       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
1269       break;
1270     case Builtin::BI__builtin_islessgreater:
1271       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
1272       break;
1273     case Builtin::BI__builtin_isunordered:
1274       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
1275       break;
1276     }
1277     // ZExt bool to int type.
1278     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
1279   }
1280   case Builtin::BI__builtin_isnan: {
1281     Value *V = EmitScalarExpr(E->getArg(0));
1282     V = Builder.CreateFCmpUNO(V, V, "cmp");
1283     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1284   }
1285 
1286   case Builtin::BIfinite:
1287   case Builtin::BI__finite:
1288   case Builtin::BIfinitef:
1289   case Builtin::BI__finitef:
1290   case Builtin::BIfinitel:
1291   case Builtin::BI__finitel:
1292   case Builtin::BI__builtin_isinf:
1293   case Builtin::BI__builtin_isfinite: {
1294     // isinf(x)    --> fabs(x) == infinity
1295     // isfinite(x) --> fabs(x) != infinity
1296     // x != NaN via the ordered compare in either case.
1297     Value *V = EmitScalarExpr(E->getArg(0));
1298     Value *Fabs = EmitFAbs(*this, V);
1299     Constant *Infinity = ConstantFP::getInfinity(V->getType());
1300     CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
1301                                   ? CmpInst::FCMP_OEQ
1302                                   : CmpInst::FCMP_ONE;
1303     Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
1304     return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
1305   }
1306 
1307   case Builtin::BI__builtin_isinf_sign: {
1308     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
1309     Value *Arg = EmitScalarExpr(E->getArg(0));
1310     Value *AbsArg = EmitFAbs(*this, Arg);
1311     Value *IsInf = Builder.CreateFCmpOEQ(
1312         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
1313     Value *IsNeg = EmitSignBit(*this, Arg);
1314 
1315     llvm::Type *IntTy = ConvertType(E->getType());
1316     Value *Zero = Constant::getNullValue(IntTy);
1317     Value *One = ConstantInt::get(IntTy, 1);
1318     Value *NegativeOne = ConstantInt::get(IntTy, -1);
1319     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
1320     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
1321     return RValue::get(Result);
1322   }
1323 
1324   case Builtin::BI__builtin_isnormal: {
1325     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
1326     Value *V = EmitScalarExpr(E->getArg(0));
1327     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
1328 
1329     Value *Abs = EmitFAbs(*this, V);
1330     Value *IsLessThanInf =
1331       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
1332     APFloat Smallest = APFloat::getSmallestNormalized(
1333                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
1334     Value *IsNormal =
1335       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
1336                             "isnormal");
1337     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
1338     V = Builder.CreateAnd(V, IsNormal, "and");
1339     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1340   }
1341 
1342   case Builtin::BI__builtin_fpclassify: {
1343     Value *V = EmitScalarExpr(E->getArg(5));
1344     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
1345 
1346     // Create Result
1347     BasicBlock *Begin = Builder.GetInsertBlock();
1348     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
1349     Builder.SetInsertPoint(End);
1350     PHINode *Result =
1351       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
1352                         "fpclassify_result");
1353 
1354     // if (V==0) return FP_ZERO
1355     Builder.SetInsertPoint(Begin);
1356     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
1357                                           "iszero");
1358     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
1359     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
1360     Builder.CreateCondBr(IsZero, End, NotZero);
1361     Result->addIncoming(ZeroLiteral, Begin);
1362 
1363     // if (V != V) return FP_NAN
1364     Builder.SetInsertPoint(NotZero);
1365     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
1366     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
1367     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
1368     Builder.CreateCondBr(IsNan, End, NotNan);
1369     Result->addIncoming(NanLiteral, NotZero);
1370 
1371     // if (fabs(V) == infinity) return FP_INFINITY
1372     Builder.SetInsertPoint(NotNan);
1373     Value *VAbs = EmitFAbs(*this, V);
1374     Value *IsInf =
1375       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
1376                             "isinf");
1377     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
1378     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
1379     Builder.CreateCondBr(IsInf, End, NotInf);
1380     Result->addIncoming(InfLiteral, NotNan);
1381 
1382     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
1383     Builder.SetInsertPoint(NotInf);
1384     APFloat Smallest = APFloat::getSmallestNormalized(
1385         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
1386     Value *IsNormal =
1387       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
1388                             "isnormal");
1389     Value *NormalResult =
1390       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
1391                            EmitScalarExpr(E->getArg(3)));
1392     Builder.CreateBr(End);
1393     Result->addIncoming(NormalResult, NotInf);
1394 
1395     // return Result
1396     Builder.SetInsertPoint(End);
1397     return RValue::get(Result);
1398   }
1399 
1400   case Builtin::BIalloca:
1401   case Builtin::BI_alloca:
1402   case Builtin::BI__builtin_alloca: {
1403     Value *Size = EmitScalarExpr(E->getArg(0));
1404     const TargetInfo &TI = getContext().getTargetInfo();
1405     // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
1406     unsigned SuitableAlignmentInBytes =
1407         CGM.getContext()
1408             .toCharUnitsFromBits(TI.getSuitableAlign())
1409             .getQuantity();
1410     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1411     AI->setAlignment(SuitableAlignmentInBytes);
1412     return RValue::get(AI);
1413   }
1414 
1415   case Builtin::BI__builtin_alloca_with_align: {
1416     Value *Size = EmitScalarExpr(E->getArg(0));
1417     Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
1418     auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
1419     unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
1420     unsigned AlignmentInBytes =
1421         CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
1422     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1423     AI->setAlignment(AlignmentInBytes);
1424     return RValue::get(AI);
1425   }
1426 
1427   case Builtin::BIbzero:
1428   case Builtin::BI__builtin_bzero: {
1429     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1430     Value *SizeVal = EmitScalarExpr(E->getArg(1));
1431     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1432                         E->getArg(0)->getExprLoc(), FD, 0);
1433     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
1434     return RValue::get(Dest.getPointer());
1435   }
1436   case Builtin::BImemcpy:
1437   case Builtin::BI__builtin_memcpy: {
1438     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1439     Address Src = EmitPointerWithAlignment(E->getArg(1));
1440     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1441     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1442                         E->getArg(0)->getExprLoc(), FD, 0);
1443     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1444                         E->getArg(1)->getExprLoc(), FD, 1);
1445     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1446     return RValue::get(Dest.getPointer());
1447   }
1448 
1449   case Builtin::BI__builtin_char_memchr:
1450     BuiltinID = Builtin::BI__builtin_memchr;
1451     break;
1452 
1453   case Builtin::BI__builtin___memcpy_chk: {
1454     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
1455     llvm::APSInt Size, DstSize;
1456     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1457         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1458       break;
1459     if (Size.ugt(DstSize))
1460       break;
1461     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1462     Address Src = EmitPointerWithAlignment(E->getArg(1));
1463     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1464     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1465     return RValue::get(Dest.getPointer());
1466   }
1467 
1468   case Builtin::BI__builtin_objc_memmove_collectable: {
1469     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1470     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1471     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1472     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1473                                                   DestAddr, SrcAddr, SizeVal);
1474     return RValue::get(DestAddr.getPointer());
1475   }
1476 
1477   case Builtin::BI__builtin___memmove_chk: {
1478     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1479     llvm::APSInt Size, DstSize;
1480     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1481         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1482       break;
1483     if (Size.ugt(DstSize))
1484       break;
1485     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1486     Address Src = EmitPointerWithAlignment(E->getArg(1));
1487     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1488     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1489     return RValue::get(Dest.getPointer());
1490   }
1491 
1492   case Builtin::BImemmove:
1493   case Builtin::BI__builtin_memmove: {
1494     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1495     Address Src = EmitPointerWithAlignment(E->getArg(1));
1496     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1497     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1498                         E->getArg(0)->getExprLoc(), FD, 0);
1499     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1500                         E->getArg(1)->getExprLoc(), FD, 1);
1501     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1502     return RValue::get(Dest.getPointer());
1503   }
1504   case Builtin::BImemset:
1505   case Builtin::BI__builtin_memset: {
1506     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1507     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1508                                          Builder.getInt8Ty());
1509     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1510     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1511                         E->getArg(0)->getExprLoc(), FD, 0);
1512     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1513     return RValue::get(Dest.getPointer());
1514   }
1515   case Builtin::BI__builtin___memset_chk: {
1516     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1517     llvm::APSInt Size, DstSize;
1518     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1519         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1520       break;
1521     if (Size.ugt(DstSize))
1522       break;
1523     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1524     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1525                                          Builder.getInt8Ty());
1526     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1527     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1528     return RValue::get(Dest.getPointer());
1529   }
1530   case Builtin::BI__builtin_dwarf_cfa: {
1531     // The offset in bytes from the first argument to the CFA.
1532     //
1533     // Why on earth is this in the frontend?  Is there any reason at
1534     // all that the backend can't reasonably determine this while
1535     // lowering llvm.eh.dwarf.cfa()?
1536     //
1537     // TODO: If there's a satisfactory reason, add a target hook for
1538     // this instead of hard-coding 0, which is correct for most targets.
1539     int32_t Offset = 0;
1540 
1541     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1542     return RValue::get(Builder.CreateCall(F,
1543                                       llvm::ConstantInt::get(Int32Ty, Offset)));
1544   }
1545   case Builtin::BI__builtin_return_address: {
1546     Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
1547                                                    getContext().UnsignedIntTy);
1548     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1549     return RValue::get(Builder.CreateCall(F, Depth));
1550   }
1551   case Builtin::BI_ReturnAddress: {
1552     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1553     return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
1554   }
1555   case Builtin::BI__builtin_frame_address: {
1556     Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
1557                                                    getContext().UnsignedIntTy);
1558     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1559     return RValue::get(Builder.CreateCall(F, Depth));
1560   }
1561   case Builtin::BI__builtin_extract_return_addr: {
1562     Value *Address = EmitScalarExpr(E->getArg(0));
1563     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1564     return RValue::get(Result);
1565   }
1566   case Builtin::BI__builtin_frob_return_addr: {
1567     Value *Address = EmitScalarExpr(E->getArg(0));
1568     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1569     return RValue::get(Result);
1570   }
1571   case Builtin::BI__builtin_dwarf_sp_column: {
1572     llvm::IntegerType *Ty
1573       = cast<llvm::IntegerType>(ConvertType(E->getType()));
1574     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1575     if (Column == -1) {
1576       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1577       return RValue::get(llvm::UndefValue::get(Ty));
1578     }
1579     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1580   }
1581   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1582     Value *Address = EmitScalarExpr(E->getArg(0));
1583     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1584       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1585     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1586   }
1587   case Builtin::BI__builtin_eh_return: {
1588     Value *Int = EmitScalarExpr(E->getArg(0));
1589     Value *Ptr = EmitScalarExpr(E->getArg(1));
1590 
1591     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1592     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1593            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1594     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1595                                   ? Intrinsic::eh_return_i32
1596                                   : Intrinsic::eh_return_i64);
1597     Builder.CreateCall(F, {Int, Ptr});
1598     Builder.CreateUnreachable();
1599 
1600     // We do need to preserve an insertion point.
1601     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1602 
1603     return RValue::get(nullptr);
1604   }
1605   case Builtin::BI__builtin_unwind_init: {
1606     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1607     return RValue::get(Builder.CreateCall(F));
1608   }
1609   case Builtin::BI__builtin_extend_pointer: {
1610     // Extends a pointer to the size of an _Unwind_Word, which is
1611     // uint64_t on all platforms.  Generally this gets poked into a
1612     // register and eventually used as an address, so if the
1613     // addressing registers are wider than pointers and the platform
1614     // doesn't implicitly ignore high-order bits when doing
1615     // addressing, we need to make sure we zext / sext based on
1616     // the platform's expectations.
1617     //
1618     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1619 
1620     // Cast the pointer to intptr_t.
1621     Value *Ptr = EmitScalarExpr(E->getArg(0));
1622     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1623 
1624     // If that's 64 bits, we're done.
1625     if (IntPtrTy->getBitWidth() == 64)
1626       return RValue::get(Result);
1627 
1628     // Otherwise, ask the codegen data what to do.
1629     if (getTargetHooks().extendPointerWithSExt())
1630       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1631     else
1632       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1633   }
1634   case Builtin::BI__builtin_setjmp: {
1635     // Buffer is a void**.
1636     Address Buf = EmitPointerWithAlignment(E->getArg(0));
1637 
1638     // Store the frame pointer to the setjmp buffer.
1639     Value *FrameAddr =
1640       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1641                          ConstantInt::get(Int32Ty, 0));
1642     Builder.CreateStore(FrameAddr, Buf);
1643 
1644     // Store the stack pointer to the setjmp buffer.
1645     Value *StackAddr =
1646         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1647     Address StackSaveSlot =
1648       Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1649     Builder.CreateStore(StackAddr, StackSaveSlot);
1650 
1651     // Call LLVM's EH setjmp, which is lightweight.
1652     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1653     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1654     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1655   }
1656   case Builtin::BI__builtin_longjmp: {
1657     Value *Buf = EmitScalarExpr(E->getArg(0));
1658     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1659 
1660     // Call LLVM's EH longjmp, which is lightweight.
1661     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1662 
1663     // longjmp doesn't return; mark this as unreachable.
1664     Builder.CreateUnreachable();
1665 
1666     // We do need to preserve an insertion point.
1667     EmitBlock(createBasicBlock("longjmp.cont"));
1668 
1669     return RValue::get(nullptr);
1670   }
1671   case Builtin::BI__sync_fetch_and_add:
1672   case Builtin::BI__sync_fetch_and_sub:
1673   case Builtin::BI__sync_fetch_and_or:
1674   case Builtin::BI__sync_fetch_and_and:
1675   case Builtin::BI__sync_fetch_and_xor:
1676   case Builtin::BI__sync_fetch_and_nand:
1677   case Builtin::BI__sync_add_and_fetch:
1678   case Builtin::BI__sync_sub_and_fetch:
1679   case Builtin::BI__sync_and_and_fetch:
1680   case Builtin::BI__sync_or_and_fetch:
1681   case Builtin::BI__sync_xor_and_fetch:
1682   case Builtin::BI__sync_nand_and_fetch:
1683   case Builtin::BI__sync_val_compare_and_swap:
1684   case Builtin::BI__sync_bool_compare_and_swap:
1685   case Builtin::BI__sync_lock_test_and_set:
1686   case Builtin::BI__sync_lock_release:
1687   case Builtin::BI__sync_swap:
1688     llvm_unreachable("Shouldn't make it through sema");
1689   case Builtin::BI__sync_fetch_and_add_1:
1690   case Builtin::BI__sync_fetch_and_add_2:
1691   case Builtin::BI__sync_fetch_and_add_4:
1692   case Builtin::BI__sync_fetch_and_add_8:
1693   case Builtin::BI__sync_fetch_and_add_16:
1694     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1695   case Builtin::BI__sync_fetch_and_sub_1:
1696   case Builtin::BI__sync_fetch_and_sub_2:
1697   case Builtin::BI__sync_fetch_and_sub_4:
1698   case Builtin::BI__sync_fetch_and_sub_8:
1699   case Builtin::BI__sync_fetch_and_sub_16:
1700     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1701   case Builtin::BI__sync_fetch_and_or_1:
1702   case Builtin::BI__sync_fetch_and_or_2:
1703   case Builtin::BI__sync_fetch_and_or_4:
1704   case Builtin::BI__sync_fetch_and_or_8:
1705   case Builtin::BI__sync_fetch_and_or_16:
1706     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1707   case Builtin::BI__sync_fetch_and_and_1:
1708   case Builtin::BI__sync_fetch_and_and_2:
1709   case Builtin::BI__sync_fetch_and_and_4:
1710   case Builtin::BI__sync_fetch_and_and_8:
1711   case Builtin::BI__sync_fetch_and_and_16:
1712     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1713   case Builtin::BI__sync_fetch_and_xor_1:
1714   case Builtin::BI__sync_fetch_and_xor_2:
1715   case Builtin::BI__sync_fetch_and_xor_4:
1716   case Builtin::BI__sync_fetch_and_xor_8:
1717   case Builtin::BI__sync_fetch_and_xor_16:
1718     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1719   case Builtin::BI__sync_fetch_and_nand_1:
1720   case Builtin::BI__sync_fetch_and_nand_2:
1721   case Builtin::BI__sync_fetch_and_nand_4:
1722   case Builtin::BI__sync_fetch_and_nand_8:
1723   case Builtin::BI__sync_fetch_and_nand_16:
1724     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1725 
1726   // Clang extensions: not overloaded yet.
1727   case Builtin::BI__sync_fetch_and_min:
1728     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1729   case Builtin::BI__sync_fetch_and_max:
1730     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1731   case Builtin::BI__sync_fetch_and_umin:
1732     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1733   case Builtin::BI__sync_fetch_and_umax:
1734     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1735 
1736   case Builtin::BI__sync_add_and_fetch_1:
1737   case Builtin::BI__sync_add_and_fetch_2:
1738   case Builtin::BI__sync_add_and_fetch_4:
1739   case Builtin::BI__sync_add_and_fetch_8:
1740   case Builtin::BI__sync_add_and_fetch_16:
1741     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1742                                 llvm::Instruction::Add);
1743   case Builtin::BI__sync_sub_and_fetch_1:
1744   case Builtin::BI__sync_sub_and_fetch_2:
1745   case Builtin::BI__sync_sub_and_fetch_4:
1746   case Builtin::BI__sync_sub_and_fetch_8:
1747   case Builtin::BI__sync_sub_and_fetch_16:
1748     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1749                                 llvm::Instruction::Sub);
1750   case Builtin::BI__sync_and_and_fetch_1:
1751   case Builtin::BI__sync_and_and_fetch_2:
1752   case Builtin::BI__sync_and_and_fetch_4:
1753   case Builtin::BI__sync_and_and_fetch_8:
1754   case Builtin::BI__sync_and_and_fetch_16:
1755     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1756                                 llvm::Instruction::And);
1757   case Builtin::BI__sync_or_and_fetch_1:
1758   case Builtin::BI__sync_or_and_fetch_2:
1759   case Builtin::BI__sync_or_and_fetch_4:
1760   case Builtin::BI__sync_or_and_fetch_8:
1761   case Builtin::BI__sync_or_and_fetch_16:
1762     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1763                                 llvm::Instruction::Or);
1764   case Builtin::BI__sync_xor_and_fetch_1:
1765   case Builtin::BI__sync_xor_and_fetch_2:
1766   case Builtin::BI__sync_xor_and_fetch_4:
1767   case Builtin::BI__sync_xor_and_fetch_8:
1768   case Builtin::BI__sync_xor_and_fetch_16:
1769     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1770                                 llvm::Instruction::Xor);
1771   case Builtin::BI__sync_nand_and_fetch_1:
1772   case Builtin::BI__sync_nand_and_fetch_2:
1773   case Builtin::BI__sync_nand_and_fetch_4:
1774   case Builtin::BI__sync_nand_and_fetch_8:
1775   case Builtin::BI__sync_nand_and_fetch_16:
1776     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1777                                 llvm::Instruction::And, true);
1778 
1779   case Builtin::BI__sync_val_compare_and_swap_1:
1780   case Builtin::BI__sync_val_compare_and_swap_2:
1781   case Builtin::BI__sync_val_compare_and_swap_4:
1782   case Builtin::BI__sync_val_compare_and_swap_8:
1783   case Builtin::BI__sync_val_compare_and_swap_16:
1784     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1785 
1786   case Builtin::BI__sync_bool_compare_and_swap_1:
1787   case Builtin::BI__sync_bool_compare_and_swap_2:
1788   case Builtin::BI__sync_bool_compare_and_swap_4:
1789   case Builtin::BI__sync_bool_compare_and_swap_8:
1790   case Builtin::BI__sync_bool_compare_and_swap_16:
1791     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1792 
1793   case Builtin::BI__sync_swap_1:
1794   case Builtin::BI__sync_swap_2:
1795   case Builtin::BI__sync_swap_4:
1796   case Builtin::BI__sync_swap_8:
1797   case Builtin::BI__sync_swap_16:
1798     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1799 
1800   case Builtin::BI__sync_lock_test_and_set_1:
1801   case Builtin::BI__sync_lock_test_and_set_2:
1802   case Builtin::BI__sync_lock_test_and_set_4:
1803   case Builtin::BI__sync_lock_test_and_set_8:
1804   case Builtin::BI__sync_lock_test_and_set_16:
1805     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1806 
1807   case Builtin::BI__sync_lock_release_1:
1808   case Builtin::BI__sync_lock_release_2:
1809   case Builtin::BI__sync_lock_release_4:
1810   case Builtin::BI__sync_lock_release_8:
1811   case Builtin::BI__sync_lock_release_16: {
1812     Value *Ptr = EmitScalarExpr(E->getArg(0));
1813     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1814     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1815     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1816                                              StoreSize.getQuantity() * 8);
1817     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1818     llvm::StoreInst *Store =
1819       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1820                                  StoreSize);
1821     Store->setAtomic(llvm::AtomicOrdering::Release);
1822     return RValue::get(nullptr);
1823   }
1824 
1825   case Builtin::BI__sync_synchronize: {
1826     // We assume this is supposed to correspond to a C++0x-style
1827     // sequentially-consistent fence (i.e. this is only usable for
1828     // synchonization, not device I/O or anything like that). This intrinsic
1829     // is really badly designed in the sense that in theory, there isn't
1830     // any way to safely use it... but in practice, it mostly works
1831     // to use it with non-atomic loads and stores to get acquire/release
1832     // semantics.
1833     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1834     return RValue::get(nullptr);
1835   }
1836 
1837   case Builtin::BI__builtin_nontemporal_load:
1838     return RValue::get(EmitNontemporalLoad(*this, E));
1839   case Builtin::BI__builtin_nontemporal_store:
1840     return RValue::get(EmitNontemporalStore(*this, E));
1841   case Builtin::BI__c11_atomic_is_lock_free:
1842   case Builtin::BI__atomic_is_lock_free: {
1843     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1844     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1845     // _Atomic(T) is always properly-aligned.
1846     const char *LibCallName = "__atomic_is_lock_free";
1847     CallArgList Args;
1848     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1849              getContext().getSizeType());
1850     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1851       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1852                getContext().VoidPtrTy);
1853     else
1854       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1855                getContext().VoidPtrTy);
1856     const CGFunctionInfo &FuncInfo =
1857         CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1858     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1859     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1860     return EmitCall(FuncInfo, CGCallee::forDirect(Func),
1861                     ReturnValueSlot(), Args);
1862   }
1863 
1864   case Builtin::BI__atomic_test_and_set: {
1865     // Look at the argument type to determine whether this is a volatile
1866     // operation. The parameter type is always volatile.
1867     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1868     bool Volatile =
1869         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1870 
1871     Value *Ptr = EmitScalarExpr(E->getArg(0));
1872     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1873     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1874     Value *NewVal = Builder.getInt8(1);
1875     Value *Order = EmitScalarExpr(E->getArg(1));
1876     if (isa<llvm::ConstantInt>(Order)) {
1877       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1878       AtomicRMWInst *Result = nullptr;
1879       switch (ord) {
1880       case 0:  // memory_order_relaxed
1881       default: // invalid order
1882         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1883                                          llvm::AtomicOrdering::Monotonic);
1884         break;
1885       case 1: // memory_order_consume
1886       case 2: // memory_order_acquire
1887         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1888                                          llvm::AtomicOrdering::Acquire);
1889         break;
1890       case 3: // memory_order_release
1891         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1892                                          llvm::AtomicOrdering::Release);
1893         break;
1894       case 4: // memory_order_acq_rel
1895 
1896         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1897                                          llvm::AtomicOrdering::AcquireRelease);
1898         break;
1899       case 5: // memory_order_seq_cst
1900         Result = Builder.CreateAtomicRMW(
1901             llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1902             llvm::AtomicOrdering::SequentiallyConsistent);
1903         break;
1904       }
1905       Result->setVolatile(Volatile);
1906       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1907     }
1908 
1909     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1910 
1911     llvm::BasicBlock *BBs[5] = {
1912       createBasicBlock("monotonic", CurFn),
1913       createBasicBlock("acquire", CurFn),
1914       createBasicBlock("release", CurFn),
1915       createBasicBlock("acqrel", CurFn),
1916       createBasicBlock("seqcst", CurFn)
1917     };
1918     llvm::AtomicOrdering Orders[5] = {
1919         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1920         llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1921         llvm::AtomicOrdering::SequentiallyConsistent};
1922 
1923     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1924     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1925 
1926     Builder.SetInsertPoint(ContBB);
1927     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1928 
1929     for (unsigned i = 0; i < 5; ++i) {
1930       Builder.SetInsertPoint(BBs[i]);
1931       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1932                                                    Ptr, NewVal, Orders[i]);
1933       RMW->setVolatile(Volatile);
1934       Result->addIncoming(RMW, BBs[i]);
1935       Builder.CreateBr(ContBB);
1936     }
1937 
1938     SI->addCase(Builder.getInt32(0), BBs[0]);
1939     SI->addCase(Builder.getInt32(1), BBs[1]);
1940     SI->addCase(Builder.getInt32(2), BBs[1]);
1941     SI->addCase(Builder.getInt32(3), BBs[2]);
1942     SI->addCase(Builder.getInt32(4), BBs[3]);
1943     SI->addCase(Builder.getInt32(5), BBs[4]);
1944 
1945     Builder.SetInsertPoint(ContBB);
1946     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1947   }
1948 
1949   case Builtin::BI__atomic_clear: {
1950     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1951     bool Volatile =
1952         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1953 
1954     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1955     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1956     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1957     Value *NewVal = Builder.getInt8(0);
1958     Value *Order = EmitScalarExpr(E->getArg(1));
1959     if (isa<llvm::ConstantInt>(Order)) {
1960       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1961       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1962       switch (ord) {
1963       case 0:  // memory_order_relaxed
1964       default: // invalid order
1965         Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1966         break;
1967       case 3:  // memory_order_release
1968         Store->setOrdering(llvm::AtomicOrdering::Release);
1969         break;
1970       case 5:  // memory_order_seq_cst
1971         Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1972         break;
1973       }
1974       return RValue::get(nullptr);
1975     }
1976 
1977     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1978 
1979     llvm::BasicBlock *BBs[3] = {
1980       createBasicBlock("monotonic", CurFn),
1981       createBasicBlock("release", CurFn),
1982       createBasicBlock("seqcst", CurFn)
1983     };
1984     llvm::AtomicOrdering Orders[3] = {
1985         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1986         llvm::AtomicOrdering::SequentiallyConsistent};
1987 
1988     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1989     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1990 
1991     for (unsigned i = 0; i < 3; ++i) {
1992       Builder.SetInsertPoint(BBs[i]);
1993       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1994       Store->setOrdering(Orders[i]);
1995       Builder.CreateBr(ContBB);
1996     }
1997 
1998     SI->addCase(Builder.getInt32(0), BBs[0]);
1999     SI->addCase(Builder.getInt32(3), BBs[1]);
2000     SI->addCase(Builder.getInt32(5), BBs[2]);
2001 
2002     Builder.SetInsertPoint(ContBB);
2003     return RValue::get(nullptr);
2004   }
2005 
2006   case Builtin::BI__atomic_thread_fence:
2007   case Builtin::BI__atomic_signal_fence:
2008   case Builtin::BI__c11_atomic_thread_fence:
2009   case Builtin::BI__c11_atomic_signal_fence: {
2010     llvm::SyncScope::ID SSID;
2011     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
2012         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
2013       SSID = llvm::SyncScope::SingleThread;
2014     else
2015       SSID = llvm::SyncScope::System;
2016     Value *Order = EmitScalarExpr(E->getArg(0));
2017     if (isa<llvm::ConstantInt>(Order)) {
2018       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2019       switch (ord) {
2020       case 0:  // memory_order_relaxed
2021       default: // invalid order
2022         break;
2023       case 1:  // memory_order_consume
2024       case 2:  // memory_order_acquire
2025         Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
2026         break;
2027       case 3:  // memory_order_release
2028         Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
2029         break;
2030       case 4:  // memory_order_acq_rel
2031         Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
2032         break;
2033       case 5:  // memory_order_seq_cst
2034         Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
2035         break;
2036       }
2037       return RValue::get(nullptr);
2038     }
2039 
2040     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
2041     AcquireBB = createBasicBlock("acquire", CurFn);
2042     ReleaseBB = createBasicBlock("release", CurFn);
2043     AcqRelBB = createBasicBlock("acqrel", CurFn);
2044     SeqCstBB = createBasicBlock("seqcst", CurFn);
2045     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2046 
2047     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2048     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
2049 
2050     Builder.SetInsertPoint(AcquireBB);
2051     Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
2052     Builder.CreateBr(ContBB);
2053     SI->addCase(Builder.getInt32(1), AcquireBB);
2054     SI->addCase(Builder.getInt32(2), AcquireBB);
2055 
2056     Builder.SetInsertPoint(ReleaseBB);
2057     Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
2058     Builder.CreateBr(ContBB);
2059     SI->addCase(Builder.getInt32(3), ReleaseBB);
2060 
2061     Builder.SetInsertPoint(AcqRelBB);
2062     Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
2063     Builder.CreateBr(ContBB);
2064     SI->addCase(Builder.getInt32(4), AcqRelBB);
2065 
2066     Builder.SetInsertPoint(SeqCstBB);
2067     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
2068     Builder.CreateBr(ContBB);
2069     SI->addCase(Builder.getInt32(5), SeqCstBB);
2070 
2071     Builder.SetInsertPoint(ContBB);
2072     return RValue::get(nullptr);
2073   }
2074 
2075     // Library functions with special handling.
2076   case Builtin::BIsqrt:
2077   case Builtin::BIsqrtf:
2078   case Builtin::BIsqrtl: {
2079     // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
2080     // in finite- or unsafe-math mode (the intrinsic has different semantics
2081     // for handling negative numbers compared to the library function, so
2082     // -fmath-errno=0 is not enough).
2083     if (!FD->hasAttr<ConstAttr>())
2084       break;
2085     if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
2086           CGM.getCodeGenOpts().NoNaNsFPMath))
2087       break;
2088     Value *Arg0 = EmitScalarExpr(E->getArg(0));
2089     llvm::Type *ArgType = Arg0->getType();
2090     Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
2091     return RValue::get(Builder.CreateCall(F, Arg0));
2092   }
2093 
2094   case Builtin::BI__builtin_pow:
2095   case Builtin::BI__builtin_powf:
2096   case Builtin::BI__builtin_powl:
2097   case Builtin::BIpow:
2098   case Builtin::BIpowf:
2099   case Builtin::BIpowl: {
2100     // Transform a call to pow* into a @llvm.pow.* intrinsic call.
2101     if (!FD->hasAttr<ConstAttr>())
2102       break;
2103     Value *Base = EmitScalarExpr(E->getArg(0));
2104     Value *Exponent = EmitScalarExpr(E->getArg(1));
2105     llvm::Type *ArgType = Base->getType();
2106     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
2107     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
2108   }
2109 
2110   case Builtin::BIfma:
2111   case Builtin::BIfmaf:
2112   case Builtin::BIfmal:
2113   case Builtin::BI__builtin_fma:
2114   case Builtin::BI__builtin_fmaf:
2115   case Builtin::BI__builtin_fmal: {
2116     // Rewrite fma to intrinsic.
2117     Value *FirstArg = EmitScalarExpr(E->getArg(0));
2118     llvm::Type *ArgType = FirstArg->getType();
2119     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
2120     return RValue::get(
2121         Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
2122                                EmitScalarExpr(E->getArg(2))}));
2123   }
2124 
2125   case Builtin::BI__builtin_signbit:
2126   case Builtin::BI__builtin_signbitf:
2127   case Builtin::BI__builtin_signbitl: {
2128     return RValue::get(
2129         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
2130                            ConvertType(E->getType())));
2131   }
2132   case Builtin::BI__annotation: {
2133     // Re-encode each wide string to UTF8 and make an MDString.
2134     SmallVector<Metadata *, 1> Strings;
2135     for (const Expr *Arg : E->arguments()) {
2136       const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
2137       assert(Str->getCharByteWidth() == 2);
2138       StringRef WideBytes = Str->getBytes();
2139       std::string StrUtf8;
2140       if (!convertUTF16ToUTF8String(
2141               makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
2142         CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
2143         continue;
2144       }
2145       Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
2146     }
2147 
2148     // Build and MDTuple of MDStrings and emit the intrinsic call.
2149     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
2150     MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
2151     Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
2152     return RValue::getIgnored();
2153   }
2154   case Builtin::BI__builtin_annotation: {
2155     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
2156     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
2157                                       AnnVal->getType());
2158 
2159     // Get the annotation string, go through casts. Sema requires this to be a
2160     // non-wide string literal, potentially casted, so the cast<> is safe.
2161     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
2162     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
2163     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
2164   }
2165   case Builtin::BI__builtin_addcb:
2166   case Builtin::BI__builtin_addcs:
2167   case Builtin::BI__builtin_addc:
2168   case Builtin::BI__builtin_addcl:
2169   case Builtin::BI__builtin_addcll:
2170   case Builtin::BI__builtin_subcb:
2171   case Builtin::BI__builtin_subcs:
2172   case Builtin::BI__builtin_subc:
2173   case Builtin::BI__builtin_subcl:
2174   case Builtin::BI__builtin_subcll: {
2175 
2176     // We translate all of these builtins from expressions of the form:
2177     //   int x = ..., y = ..., carryin = ..., carryout, result;
2178     //   result = __builtin_addc(x, y, carryin, &carryout);
2179     //
2180     // to LLVM IR of the form:
2181     //
2182     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
2183     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
2184     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
2185     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
2186     //                                                       i32 %carryin)
2187     //   %result = extractvalue {i32, i1} %tmp2, 0
2188     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
2189     //   %tmp3 = or i1 %carry1, %carry2
2190     //   %tmp4 = zext i1 %tmp3 to i32
2191     //   store i32 %tmp4, i32* %carryout
2192 
2193     // Scalarize our inputs.
2194     llvm::Value *X = EmitScalarExpr(E->getArg(0));
2195     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2196     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
2197     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
2198 
2199     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
2200     llvm::Intrinsic::ID IntrinsicId;
2201     switch (BuiltinID) {
2202     default: llvm_unreachable("Unknown multiprecision builtin id.");
2203     case Builtin::BI__builtin_addcb:
2204     case Builtin::BI__builtin_addcs:
2205     case Builtin::BI__builtin_addc:
2206     case Builtin::BI__builtin_addcl:
2207     case Builtin::BI__builtin_addcll:
2208       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2209       break;
2210     case Builtin::BI__builtin_subcb:
2211     case Builtin::BI__builtin_subcs:
2212     case Builtin::BI__builtin_subc:
2213     case Builtin::BI__builtin_subcl:
2214     case Builtin::BI__builtin_subcll:
2215       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2216       break;
2217     }
2218 
2219     // Construct our resulting LLVM IR expression.
2220     llvm::Value *Carry1;
2221     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
2222                                               X, Y, Carry1);
2223     llvm::Value *Carry2;
2224     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
2225                                               Sum1, Carryin, Carry2);
2226     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
2227                                                X->getType());
2228     Builder.CreateStore(CarryOut, CarryOutPtr);
2229     return RValue::get(Sum2);
2230   }
2231 
2232   case Builtin::BI__builtin_add_overflow:
2233   case Builtin::BI__builtin_sub_overflow:
2234   case Builtin::BI__builtin_mul_overflow: {
2235     const clang::Expr *LeftArg = E->getArg(0);
2236     const clang::Expr *RightArg = E->getArg(1);
2237     const clang::Expr *ResultArg = E->getArg(2);
2238 
2239     clang::QualType ResultQTy =
2240         ResultArg->getType()->castAs<PointerType>()->getPointeeType();
2241 
2242     WidthAndSignedness LeftInfo =
2243         getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
2244     WidthAndSignedness RightInfo =
2245         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
2246     WidthAndSignedness ResultInfo =
2247         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
2248     WidthAndSignedness EncompassingInfo =
2249         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
2250 
2251     llvm::Type *EncompassingLLVMTy =
2252         llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
2253 
2254     llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
2255 
2256     llvm::Intrinsic::ID IntrinsicId;
2257     switch (BuiltinID) {
2258     default:
2259       llvm_unreachable("Unknown overflow builtin id.");
2260     case Builtin::BI__builtin_add_overflow:
2261       IntrinsicId = EncompassingInfo.Signed
2262                         ? llvm::Intrinsic::sadd_with_overflow
2263                         : llvm::Intrinsic::uadd_with_overflow;
2264       break;
2265     case Builtin::BI__builtin_sub_overflow:
2266       IntrinsicId = EncompassingInfo.Signed
2267                         ? llvm::Intrinsic::ssub_with_overflow
2268                         : llvm::Intrinsic::usub_with_overflow;
2269       break;
2270     case Builtin::BI__builtin_mul_overflow:
2271       IntrinsicId = EncompassingInfo.Signed
2272                         ? llvm::Intrinsic::smul_with_overflow
2273                         : llvm::Intrinsic::umul_with_overflow;
2274       break;
2275     }
2276 
2277     llvm::Value *Left = EmitScalarExpr(LeftArg);
2278     llvm::Value *Right = EmitScalarExpr(RightArg);
2279     Address ResultPtr = EmitPointerWithAlignment(ResultArg);
2280 
2281     // Extend each operand to the encompassing type.
2282     Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
2283     Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
2284 
2285     // Perform the operation on the extended values.
2286     llvm::Value *Overflow, *Result;
2287     Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
2288 
2289     if (EncompassingInfo.Width > ResultInfo.Width) {
2290       // The encompassing type is wider than the result type, so we need to
2291       // truncate it.
2292       llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
2293 
2294       // To see if the truncation caused an overflow, we will extend
2295       // the result and then compare it to the original result.
2296       llvm::Value *ResultTruncExt = Builder.CreateIntCast(
2297           ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
2298       llvm::Value *TruncationOverflow =
2299           Builder.CreateICmpNE(Result, ResultTruncExt);
2300 
2301       Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
2302       Result = ResultTrunc;
2303     }
2304 
2305     // Finally, store the result using the pointer.
2306     bool isVolatile =
2307       ResultArg->getType()->getPointeeType().isVolatileQualified();
2308     Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
2309 
2310     return RValue::get(Overflow);
2311   }
2312 
2313   case Builtin::BI__builtin_uadd_overflow:
2314   case Builtin::BI__builtin_uaddl_overflow:
2315   case Builtin::BI__builtin_uaddll_overflow:
2316   case Builtin::BI__builtin_usub_overflow:
2317   case Builtin::BI__builtin_usubl_overflow:
2318   case Builtin::BI__builtin_usubll_overflow:
2319   case Builtin::BI__builtin_umul_overflow:
2320   case Builtin::BI__builtin_umull_overflow:
2321   case Builtin::BI__builtin_umulll_overflow:
2322   case Builtin::BI__builtin_sadd_overflow:
2323   case Builtin::BI__builtin_saddl_overflow:
2324   case Builtin::BI__builtin_saddll_overflow:
2325   case Builtin::BI__builtin_ssub_overflow:
2326   case Builtin::BI__builtin_ssubl_overflow:
2327   case Builtin::BI__builtin_ssubll_overflow:
2328   case Builtin::BI__builtin_smul_overflow:
2329   case Builtin::BI__builtin_smull_overflow:
2330   case Builtin::BI__builtin_smulll_overflow: {
2331 
2332     // We translate all of these builtins directly to the relevant llvm IR node.
2333 
2334     // Scalarize our inputs.
2335     llvm::Value *X = EmitScalarExpr(E->getArg(0));
2336     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2337     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
2338 
2339     // Decide which of the overflow intrinsics we are lowering to:
2340     llvm::Intrinsic::ID IntrinsicId;
2341     switch (BuiltinID) {
2342     default: llvm_unreachable("Unknown overflow builtin id.");
2343     case Builtin::BI__builtin_uadd_overflow:
2344     case Builtin::BI__builtin_uaddl_overflow:
2345     case Builtin::BI__builtin_uaddll_overflow:
2346       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2347       break;
2348     case Builtin::BI__builtin_usub_overflow:
2349     case Builtin::BI__builtin_usubl_overflow:
2350     case Builtin::BI__builtin_usubll_overflow:
2351       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2352       break;
2353     case Builtin::BI__builtin_umul_overflow:
2354     case Builtin::BI__builtin_umull_overflow:
2355     case Builtin::BI__builtin_umulll_overflow:
2356       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
2357       break;
2358     case Builtin::BI__builtin_sadd_overflow:
2359     case Builtin::BI__builtin_saddl_overflow:
2360     case Builtin::BI__builtin_saddll_overflow:
2361       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
2362       break;
2363     case Builtin::BI__builtin_ssub_overflow:
2364     case Builtin::BI__builtin_ssubl_overflow:
2365     case Builtin::BI__builtin_ssubll_overflow:
2366       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
2367       break;
2368     case Builtin::BI__builtin_smul_overflow:
2369     case Builtin::BI__builtin_smull_overflow:
2370     case Builtin::BI__builtin_smulll_overflow:
2371       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
2372       break;
2373     }
2374 
2375 
2376     llvm::Value *Carry;
2377     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
2378     Builder.CreateStore(Sum, SumOutPtr);
2379 
2380     return RValue::get(Carry);
2381   }
2382   case Builtin::BI__builtin_addressof:
2383     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
2384   case Builtin::BI__builtin_operator_new:
2385     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2386                                     E->getArg(0), false);
2387   case Builtin::BI__builtin_operator_delete:
2388     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2389                                     E->getArg(0), true);
2390   case Builtin::BI__noop:
2391     // __noop always evaluates to an integer literal zero.
2392     return RValue::get(ConstantInt::get(IntTy, 0));
2393   case Builtin::BI__builtin_call_with_static_chain: {
2394     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
2395     const Expr *Chain = E->getArg(1);
2396     return EmitCall(Call->getCallee()->getType(),
2397                     EmitCallee(Call->getCallee()), Call, ReturnValue,
2398                     EmitScalarExpr(Chain));
2399   }
2400   case Builtin::BI_InterlockedExchange8:
2401   case Builtin::BI_InterlockedExchange16:
2402   case Builtin::BI_InterlockedExchange:
2403   case Builtin::BI_InterlockedExchangePointer:
2404     return RValue::get(
2405         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
2406   case Builtin::BI_InterlockedCompareExchangePointer: {
2407     llvm::Type *RTy;
2408     llvm::IntegerType *IntType =
2409       IntegerType::get(getLLVMContext(),
2410                        getContext().getTypeSize(E->getType()));
2411     llvm::Type *IntPtrType = IntType->getPointerTo();
2412 
2413     llvm::Value *Destination =
2414       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
2415 
2416     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
2417     RTy = Exchange->getType();
2418     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
2419 
2420     llvm::Value *Comparand =
2421       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
2422 
2423     auto Result =
2424         Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
2425                                     AtomicOrdering::SequentiallyConsistent,
2426                                     AtomicOrdering::SequentiallyConsistent);
2427     Result->setVolatile(true);
2428 
2429     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
2430                                                                          0),
2431                                               RTy));
2432   }
2433   case Builtin::BI_InterlockedCompareExchange8:
2434   case Builtin::BI_InterlockedCompareExchange16:
2435   case Builtin::BI_InterlockedCompareExchange:
2436   case Builtin::BI_InterlockedCompareExchange64: {
2437     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
2438         EmitScalarExpr(E->getArg(0)),
2439         EmitScalarExpr(E->getArg(2)),
2440         EmitScalarExpr(E->getArg(1)),
2441         AtomicOrdering::SequentiallyConsistent,
2442         AtomicOrdering::SequentiallyConsistent);
2443       CXI->setVolatile(true);
2444       return RValue::get(Builder.CreateExtractValue(CXI, 0));
2445   }
2446   case Builtin::BI_InterlockedIncrement16:
2447   case Builtin::BI_InterlockedIncrement:
2448     return RValue::get(
2449         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
2450   case Builtin::BI_InterlockedDecrement16:
2451   case Builtin::BI_InterlockedDecrement:
2452     return RValue::get(
2453         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
2454   case Builtin::BI_InterlockedAnd8:
2455   case Builtin::BI_InterlockedAnd16:
2456   case Builtin::BI_InterlockedAnd:
2457     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
2458   case Builtin::BI_InterlockedExchangeAdd8:
2459   case Builtin::BI_InterlockedExchangeAdd16:
2460   case Builtin::BI_InterlockedExchangeAdd:
2461     return RValue::get(
2462         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
2463   case Builtin::BI_InterlockedExchangeSub8:
2464   case Builtin::BI_InterlockedExchangeSub16:
2465   case Builtin::BI_InterlockedExchangeSub:
2466     return RValue::get(
2467         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
2468   case Builtin::BI_InterlockedOr8:
2469   case Builtin::BI_InterlockedOr16:
2470   case Builtin::BI_InterlockedOr:
2471     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
2472   case Builtin::BI_InterlockedXor8:
2473   case Builtin::BI_InterlockedXor16:
2474   case Builtin::BI_InterlockedXor:
2475     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
2476   case Builtin::BI_interlockedbittestandset:
2477     return RValue::get(
2478         EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
2479 
2480   case Builtin::BI__exception_code:
2481   case Builtin::BI_exception_code:
2482     return RValue::get(EmitSEHExceptionCode());
2483   case Builtin::BI__exception_info:
2484   case Builtin::BI_exception_info:
2485     return RValue::get(EmitSEHExceptionInfo());
2486   case Builtin::BI__abnormal_termination:
2487   case Builtin::BI_abnormal_termination:
2488     return RValue::get(EmitSEHAbnormalTermination());
2489   case Builtin::BI_setjmpex: {
2490     if (getTarget().getTriple().isOSMSVCRT()) {
2491       llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2492       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2493           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2494           llvm::Attribute::ReturnsTwice);
2495       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2496           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2497           "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
2498       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2499           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2500       llvm::Value *FrameAddr =
2501           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2502                              ConstantInt::get(Int32Ty, 0));
2503       llvm::Value *Args[] = {Buf, FrameAddr};
2504       llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2505       CS.setAttributes(ReturnsTwiceAttr);
2506       return RValue::get(CS.getInstruction());
2507     }
2508     break;
2509   }
2510   case Builtin::BI_setjmp: {
2511     if (getTarget().getTriple().isOSMSVCRT()) {
2512       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2513           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2514           llvm::Attribute::ReturnsTwice);
2515       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2516           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2517       llvm::CallSite CS;
2518       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2519         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2520         llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2521             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2522             "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
2523         llvm::Value *Count = ConstantInt::get(IntTy, 0);
2524         llvm::Value *Args[] = {Buf, Count};
2525         CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2526       } else {
2527         llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2528         llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2529             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2530             "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
2531         llvm::Value *FrameAddr =
2532             Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2533                                ConstantInt::get(Int32Ty, 0));
2534         llvm::Value *Args[] = {Buf, FrameAddr};
2535         CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2536       }
2537       CS.setAttributes(ReturnsTwiceAttr);
2538       return RValue::get(CS.getInstruction());
2539     }
2540     break;
2541   }
2542 
2543   case Builtin::BI__GetExceptionInfo: {
2544     if (llvm::GlobalVariable *GV =
2545             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2546       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2547     break;
2548   }
2549 
2550   case Builtin::BI__fastfail:
2551     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
2552 
2553   case Builtin::BI__builtin_coro_size: {
2554     auto & Context = getContext();
2555     auto SizeTy = Context.getSizeType();
2556     auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
2557     Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
2558     return RValue::get(Builder.CreateCall(F));
2559   }
2560 
2561   case Builtin::BI__builtin_coro_id:
2562     return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
2563   case Builtin::BI__builtin_coro_promise:
2564     return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
2565   case Builtin::BI__builtin_coro_resume:
2566     return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
2567   case Builtin::BI__builtin_coro_frame:
2568     return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
2569   case Builtin::BI__builtin_coro_free:
2570     return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
2571   case Builtin::BI__builtin_coro_destroy:
2572     return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
2573   case Builtin::BI__builtin_coro_done:
2574     return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
2575   case Builtin::BI__builtin_coro_alloc:
2576     return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
2577   case Builtin::BI__builtin_coro_begin:
2578     return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
2579   case Builtin::BI__builtin_coro_end:
2580     return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
2581   case Builtin::BI__builtin_coro_suspend:
2582     return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
2583   case Builtin::BI__builtin_coro_param:
2584     return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
2585 
2586   // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2587   case Builtin::BIread_pipe:
2588   case Builtin::BIwrite_pipe: {
2589     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2590           *Arg1 = EmitScalarExpr(E->getArg(1));
2591     CGOpenCLRuntime OpenCLRT(CGM);
2592     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2593     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2594 
2595     // Type of the generic packet parameter.
2596     unsigned GenericAS =
2597         getContext().getTargetAddressSpace(LangAS::opencl_generic);
2598     llvm::Type *I8PTy = llvm::PointerType::get(
2599         llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2600 
2601     // Testing which overloaded version we should generate the call for.
2602     if (2U == E->getNumArgs()) {
2603       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2604                                                              : "__write_pipe_2";
2605       // Creating a generic function type to be able to call with any builtin or
2606       // user defined type.
2607       llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
2608       llvm::FunctionType *FTy = llvm::FunctionType::get(
2609           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2610       Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2611       return RValue::get(
2612           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2613                              {Arg0, BCast, PacketSize, PacketAlign}));
2614     } else {
2615       assert(4 == E->getNumArgs() &&
2616              "Illegal number of parameters to pipe function");
2617       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2618                                                              : "__write_pipe_4";
2619 
2620       llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
2621                               Int32Ty, Int32Ty};
2622       Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2623             *Arg3 = EmitScalarExpr(E->getArg(3));
2624       llvm::FunctionType *FTy = llvm::FunctionType::get(
2625           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2626       Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2627       // We know the third argument is an integer type, but we may need to cast
2628       // it to i32.
2629       if (Arg2->getType() != Int32Ty)
2630         Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2631       return RValue::get(Builder.CreateCall(
2632           CGM.CreateRuntimeFunction(FTy, Name),
2633           {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
2634     }
2635   }
2636   // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2637   // functions
2638   case Builtin::BIreserve_read_pipe:
2639   case Builtin::BIreserve_write_pipe:
2640   case Builtin::BIwork_group_reserve_read_pipe:
2641   case Builtin::BIwork_group_reserve_write_pipe:
2642   case Builtin::BIsub_group_reserve_read_pipe:
2643   case Builtin::BIsub_group_reserve_write_pipe: {
2644     // Composing the mangled name for the function.
2645     const char *Name;
2646     if (BuiltinID == Builtin::BIreserve_read_pipe)
2647       Name = "__reserve_read_pipe";
2648     else if (BuiltinID == Builtin::BIreserve_write_pipe)
2649       Name = "__reserve_write_pipe";
2650     else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2651       Name = "__work_group_reserve_read_pipe";
2652     else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2653       Name = "__work_group_reserve_write_pipe";
2654     else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2655       Name = "__sub_group_reserve_read_pipe";
2656     else
2657       Name = "__sub_group_reserve_write_pipe";
2658 
2659     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2660           *Arg1 = EmitScalarExpr(E->getArg(1));
2661     llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2662     CGOpenCLRuntime OpenCLRT(CGM);
2663     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2664     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2665 
2666     // Building the generic function prototype.
2667     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
2668     llvm::FunctionType *FTy = llvm::FunctionType::get(
2669         ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2670     // We know the second argument is an integer type, but we may need to cast
2671     // it to i32.
2672     if (Arg1->getType() != Int32Ty)
2673       Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2674     return RValue::get(
2675         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2676                            {Arg0, Arg1, PacketSize, PacketAlign}));
2677   }
2678   // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2679   // functions
2680   case Builtin::BIcommit_read_pipe:
2681   case Builtin::BIcommit_write_pipe:
2682   case Builtin::BIwork_group_commit_read_pipe:
2683   case Builtin::BIwork_group_commit_write_pipe:
2684   case Builtin::BIsub_group_commit_read_pipe:
2685   case Builtin::BIsub_group_commit_write_pipe: {
2686     const char *Name;
2687     if (BuiltinID == Builtin::BIcommit_read_pipe)
2688       Name = "__commit_read_pipe";
2689     else if (BuiltinID == Builtin::BIcommit_write_pipe)
2690       Name = "__commit_write_pipe";
2691     else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2692       Name = "__work_group_commit_read_pipe";
2693     else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2694       Name = "__work_group_commit_write_pipe";
2695     else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2696       Name = "__sub_group_commit_read_pipe";
2697     else
2698       Name = "__sub_group_commit_write_pipe";
2699 
2700     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2701           *Arg1 = EmitScalarExpr(E->getArg(1));
2702     CGOpenCLRuntime OpenCLRT(CGM);
2703     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2704     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2705 
2706     // Building the generic function prototype.
2707     llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
2708     llvm::FunctionType *FTy =
2709         llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2710                                 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2711 
2712     return RValue::get(
2713         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2714                            {Arg0, Arg1, PacketSize, PacketAlign}));
2715   }
2716   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2717   case Builtin::BIget_pipe_num_packets:
2718   case Builtin::BIget_pipe_max_packets: {
2719     const char *Name;
2720     if (BuiltinID == Builtin::BIget_pipe_num_packets)
2721       Name = "__get_pipe_num_packets";
2722     else
2723       Name = "__get_pipe_max_packets";
2724 
2725     // Building the generic function prototype.
2726     Value *Arg0 = EmitScalarExpr(E->getArg(0));
2727     CGOpenCLRuntime OpenCLRT(CGM);
2728     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2729     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2730     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
2731     llvm::FunctionType *FTy = llvm::FunctionType::get(
2732         Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2733 
2734     return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2735                                           {Arg0, PacketSize, PacketAlign}));
2736   }
2737 
2738   // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2739   case Builtin::BIto_global:
2740   case Builtin::BIto_local:
2741   case Builtin::BIto_private: {
2742     auto Arg0 = EmitScalarExpr(E->getArg(0));
2743     auto NewArgT = llvm::PointerType::get(Int8Ty,
2744       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2745     auto NewRetT = llvm::PointerType::get(Int8Ty,
2746       CGM.getContext().getTargetAddressSpace(
2747         E->getType()->getPointeeType().getAddressSpace()));
2748     auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2749     llvm::Value *NewArg;
2750     if (Arg0->getType()->getPointerAddressSpace() !=
2751         NewArgT->getPointerAddressSpace())
2752       NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2753     else
2754       NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2755     auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2756     auto NewCall =
2757         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2758     return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2759       ConvertType(E->getType())));
2760   }
2761 
2762   // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2763   // It contains four different overload formats specified in Table 6.13.17.1.
2764   case Builtin::BIenqueue_kernel: {
2765     StringRef Name; // Generated function call name
2766     unsigned NumArgs = E->getNumArgs();
2767 
2768     llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2769     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2770         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2771 
2772     llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2773     llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2774     LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
2775     llvm::Value *Range = NDRangeL.getAddress().getPointer();
2776     llvm::Type *RangeTy = NDRangeL.getAddress().getType();
2777 
2778     if (NumArgs == 4) {
2779       // The most basic form of the call with parameters:
2780       // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2781       Name = "__enqueue_kernel_basic";
2782       llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy};
2783       llvm::FunctionType *FTy = llvm::FunctionType::get(
2784           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
2785 
2786       llvm::Value *Block = Builder.CreatePointerCast(
2787           EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
2788 
2789       AttrBuilder B;
2790       B.addAttribute(Attribute::ByVal);
2791       llvm::AttributeList ByValAttrSet =
2792           llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
2793 
2794       auto RTCall =
2795           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
2796                              {Queue, Flags, Range, Block});
2797       RTCall->setAttributes(ByValAttrSet);
2798       return RValue::get(RTCall);
2799     }
2800     assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2801 
2802     // Create a temporary array to hold the sizes of local pointer arguments
2803     // for the block. \p First is the position of the first size argument.
2804     auto CreateArrayForSizeVar = [=](unsigned First) {
2805       auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First);
2806       auto *Arr = Builder.CreateAlloca(AT);
2807       llvm::Value *Ptr;
2808       // Each of the following arguments specifies the size of the corresponding
2809       // argument passed to the enqueued block.
2810       auto *Zero = llvm::ConstantInt::get(IntTy, 0);
2811       for (unsigned I = First; I < NumArgs; ++I) {
2812         auto *Index = llvm::ConstantInt::get(IntTy, I - First);
2813         auto *GEP = Builder.CreateGEP(Arr, {Zero, Index});
2814         if (I == First)
2815           Ptr = GEP;
2816         auto *V =
2817             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
2818         Builder.CreateAlignedStore(
2819             V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
2820       }
2821       return Ptr;
2822     };
2823 
2824     // Could have events and/or vaargs.
2825     if (E->getArg(3)->getType()->isBlockPointerType()) {
2826       // No events passed, but has variadic arguments.
2827       Name = "__enqueue_kernel_vaargs";
2828       auto *Block = Builder.CreatePointerCast(EmitScalarExpr(E->getArg(3)),
2829                                               GenericVoidPtrTy);
2830       auto *PtrToSizeArray = CreateArrayForSizeVar(4);
2831 
2832       // Create a vector of the arguments, as well as a constant value to
2833       // express to the runtime the number of variadic arguments.
2834       std::vector<llvm::Value *> Args = {Queue,
2835                                          Flags,
2836                                          Range,
2837                                          Block,
2838                                          ConstantInt::get(IntTy, NumArgs - 4),
2839                                          PtrToSizeArray};
2840       std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy,
2841                                           RangeTy, GenericVoidPtrTy,
2842                                           IntTy,   PtrToSizeArray->getType()};
2843 
2844       llvm::FunctionType *FTy = llvm::FunctionType::get(
2845           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2846       return RValue::get(
2847           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2848                              llvm::ArrayRef<llvm::Value *>(Args)));
2849     }
2850     // Any calls now have event arguments passed.
2851     if (NumArgs >= 7) {
2852       llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2853       llvm::Type *EventPtrTy = EventTy->getPointerTo(
2854           CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2855 
2856       llvm::Value *NumEvents =
2857           Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
2858       llvm::Value *EventList =
2859           E->getArg(4)->getType()->isArrayType()
2860               ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2861               : EmitScalarExpr(E->getArg(4));
2862       llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2863       // Convert to generic address space.
2864       EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
2865       ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
2866       llvm::Value *Block = Builder.CreatePointerCast(
2867           EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy);
2868 
2869       std::vector<llvm::Type *> ArgTys = {
2870           QueueTy,    Int32Ty,    RangeTy,         Int32Ty,
2871           EventPtrTy, EventPtrTy, GenericVoidPtrTy};
2872 
2873       std::vector<llvm::Value *> Args = {Queue,     Flags,    Range, NumEvents,
2874                                          EventList, ClkEvent, Block};
2875 
2876       if (NumArgs == 7) {
2877         // Has events but no variadics.
2878         Name = "__enqueue_kernel_basic_events";
2879         llvm::FunctionType *FTy = llvm::FunctionType::get(
2880             Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2881         return RValue::get(
2882             Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2883                                llvm::ArrayRef<llvm::Value *>(Args)));
2884       }
2885       // Has event info and variadics
2886       // Pass the number of variadics to the runtime function too.
2887       Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2888       ArgTys.push_back(Int32Ty);
2889       Name = "__enqueue_kernel_events_vaargs";
2890 
2891       auto *PtrToSizeArray = CreateArrayForSizeVar(7);
2892       Args.push_back(PtrToSizeArray);
2893       ArgTys.push_back(PtrToSizeArray->getType());
2894 
2895       llvm::FunctionType *FTy = llvm::FunctionType::get(
2896           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2897       return RValue::get(
2898           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2899                              llvm::ArrayRef<llvm::Value *>(Args)));
2900     }
2901     LLVM_FALLTHROUGH;
2902   }
2903   // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2904   // parameter.
2905   case Builtin::BIget_kernel_work_group_size: {
2906     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2907         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2908     Value *Arg = EmitScalarExpr(E->getArg(0));
2909     Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2910     return RValue::get(Builder.CreateCall(
2911         CGM.CreateRuntimeFunction(
2912             llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2913             "__get_kernel_work_group_size_impl"),
2914         Arg));
2915   }
2916   case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
2917     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2918         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2919     Value *Arg = EmitScalarExpr(E->getArg(0));
2920     Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2921     return RValue::get(Builder.CreateCall(
2922         CGM.CreateRuntimeFunction(
2923             llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2924             "__get_kernel_preferred_work_group_multiple_impl"),
2925         Arg));
2926   }
2927   case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
2928   case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
2929     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2930         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2931     LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
2932     llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
2933     Value *Block = EmitScalarExpr(E->getArg(1));
2934     Block = Builder.CreatePointerCast(Block, GenericVoidPtrTy);
2935     const char *Name =
2936         BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
2937             ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
2938             : "__get_kernel_sub_group_count_for_ndrange_impl";
2939     return RValue::get(Builder.CreateCall(
2940         CGM.CreateRuntimeFunction(
2941             llvm::FunctionType::get(
2942                 IntTy, {NDRange->getType(), GenericVoidPtrTy}, false),
2943             Name),
2944         {NDRange, Block}));
2945   }
2946 
2947   case Builtin::BI__builtin_store_half:
2948   case Builtin::BI__builtin_store_halff: {
2949     Value *Val = EmitScalarExpr(E->getArg(0));
2950     Address Address = EmitPointerWithAlignment(E->getArg(1));
2951     Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
2952     return RValue::get(Builder.CreateStore(HalfVal, Address));
2953   }
2954   case Builtin::BI__builtin_load_half: {
2955     Address Address = EmitPointerWithAlignment(E->getArg(0));
2956     Value *HalfVal = Builder.CreateLoad(Address);
2957     return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
2958   }
2959   case Builtin::BI__builtin_load_halff: {
2960     Address Address = EmitPointerWithAlignment(E->getArg(0));
2961     Value *HalfVal = Builder.CreateLoad(Address);
2962     return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
2963   }
2964   case Builtin::BIprintf:
2965     if (getTarget().getTriple().isNVPTX())
2966       return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
2967     break;
2968   case Builtin::BI__builtin_canonicalize:
2969   case Builtin::BI__builtin_canonicalizef:
2970   case Builtin::BI__builtin_canonicalizel:
2971     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2972 
2973   case Builtin::BI__builtin_thread_pointer: {
2974     if (!getContext().getTargetInfo().isTLSSupported())
2975       CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
2976     // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
2977     break;
2978   }
2979   case Builtin::BI__builtin_os_log_format:
2980     return emitBuiltinOSLogFormat(*E);
2981 
2982   case Builtin::BI__builtin_os_log_format_buffer_size: {
2983     analyze_os_log::OSLogBufferLayout Layout;
2984     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2985     return RValue::get(ConstantInt::get(ConvertType(E->getType()),
2986                                         Layout.size().getQuantity()));
2987   }
2988 
2989   case Builtin::BI__xray_customevent: {
2990     if (!ShouldXRayInstrumentFunction())
2991       return RValue::getIgnored();
2992     if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) {
2993       if (XRayAttr->neverXRayInstrument())
2994         return RValue::getIgnored();
2995     }
2996     Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
2997     auto FTy = F->getFunctionType();
2998     auto Arg0 = E->getArg(0);
2999     auto Arg0Val = EmitScalarExpr(Arg0);
3000     auto Arg0Ty = Arg0->getType();
3001     auto PTy0 = FTy->getParamType(0);
3002     if (PTy0 != Arg0Val->getType()) {
3003       if (Arg0Ty->isArrayType())
3004         Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
3005       else
3006         Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
3007     }
3008     auto Arg1 = EmitScalarExpr(E->getArg(1));
3009     auto PTy1 = FTy->getParamType(1);
3010     if (PTy1 != Arg1->getType())
3011       Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
3012     return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
3013   }
3014 
3015   case Builtin::BI__builtin_ms_va_start:
3016   case Builtin::BI__builtin_ms_va_end:
3017     return RValue::get(
3018         EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
3019                        BuiltinID == Builtin::BI__builtin_ms_va_start));
3020 
3021   case Builtin::BI__builtin_ms_va_copy: {
3022     // Lower this manually. We can't reliably determine whether or not any
3023     // given va_copy() is for a Win64 va_list from the calling convention
3024     // alone, because it's legal to do this from a System V ABI function.
3025     // With opaque pointer types, we won't have enough information in LLVM
3026     // IR to determine this from the argument types, either. Best to do it
3027     // now, while we have enough information.
3028     Address DestAddr = EmitMSVAListRef(E->getArg(0));
3029     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
3030 
3031     llvm::Type *BPP = Int8PtrPtrTy;
3032 
3033     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
3034                        DestAddr.getAlignment());
3035     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
3036                       SrcAddr.getAlignment());
3037 
3038     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
3039     return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
3040   }
3041   }
3042 
3043   // If this is an alias for a lib function (e.g. __builtin_sin), emit
3044   // the call using the normal call path, but using the unmangled
3045   // version of the function name.
3046   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
3047     return emitLibraryCall(*this, FD, E,
3048                            CGM.getBuiltinLibFunction(FD, BuiltinID));
3049 
3050   // If this is a predefined lib function (e.g. malloc), emit the call
3051   // using exactly the normal call path.
3052   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
3053     return emitLibraryCall(*this, FD, E,
3054                       cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
3055 
3056   // Check that a call to a target specific builtin has the correct target
3057   // features.
3058   // This is down here to avoid non-target specific builtins, however, if
3059   // generic builtins start to require generic target features then we
3060   // can move this up to the beginning of the function.
3061   checkTargetFeatures(E, FD);
3062 
3063   // See if we have a target specific intrinsic.
3064   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
3065   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
3066   StringRef Prefix =
3067       llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
3068   if (!Prefix.empty()) {
3069     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
3070     // NOTE we dont need to perform a compatibility flag check here since the
3071     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
3072     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
3073     if (IntrinsicID == Intrinsic::not_intrinsic)
3074       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
3075   }
3076 
3077   if (IntrinsicID != Intrinsic::not_intrinsic) {
3078     SmallVector<Value*, 16> Args;
3079 
3080     // Find out if any arguments are required to be integer constant
3081     // expressions.
3082     unsigned ICEArguments = 0;
3083     ASTContext::GetBuiltinTypeError Error;
3084     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3085     assert(Error == ASTContext::GE_None && "Should not codegen an error");
3086 
3087     Function *F = CGM.getIntrinsic(IntrinsicID);
3088     llvm::FunctionType *FTy = F->getFunctionType();
3089 
3090     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
3091       Value *ArgValue;
3092       // If this is a normal argument, just emit it as a scalar.
3093       if ((ICEArguments & (1 << i)) == 0) {
3094         ArgValue = EmitScalarExpr(E->getArg(i));
3095       } else {
3096         // If this is required to be a constant, constant fold it so that we
3097         // know that the generated intrinsic gets a ConstantInt.
3098         llvm::APSInt Result;
3099         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
3100         assert(IsConst && "Constant arg isn't actually constant?");
3101         (void)IsConst;
3102         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
3103       }
3104 
3105       // If the intrinsic arg type is different from the builtin arg type
3106       // we need to do a bit cast.
3107       llvm::Type *PTy = FTy->getParamType(i);
3108       if (PTy != ArgValue->getType()) {
3109         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
3110                "Must be able to losslessly bit cast to param");
3111         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
3112       }
3113 
3114       Args.push_back(ArgValue);
3115     }
3116 
3117     Value *V = Builder.CreateCall(F, Args);
3118     QualType BuiltinRetType = E->getType();
3119 
3120     llvm::Type *RetTy = VoidTy;
3121     if (!BuiltinRetType->isVoidType())
3122       RetTy = ConvertType(BuiltinRetType);
3123 
3124     if (RetTy != V->getType()) {
3125       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
3126              "Must be able to losslessly bit cast result type");
3127       V = Builder.CreateBitCast(V, RetTy);
3128     }
3129 
3130     return RValue::get(V);
3131   }
3132 
3133   // See if we have a target specific builtin that needs to be lowered.
3134   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
3135     return RValue::get(V);
3136 
3137   ErrorUnsupported(E, "builtin function");
3138 
3139   // Unknown builtin, for now just dump it out and return undef.
3140   return GetUndefRValue(E->getType());
3141 }
3142 
3143 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
3144                                         unsigned BuiltinID, const CallExpr *E,
3145                                         llvm::Triple::ArchType Arch) {
3146   switch (Arch) {
3147   case llvm::Triple::arm:
3148   case llvm::Triple::armeb:
3149   case llvm::Triple::thumb:
3150   case llvm::Triple::thumbeb:
3151     return CGF->EmitARMBuiltinExpr(BuiltinID, E);
3152   case llvm::Triple::aarch64:
3153   case llvm::Triple::aarch64_be:
3154     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
3155   case llvm::Triple::x86:
3156   case llvm::Triple::x86_64:
3157     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
3158   case llvm::Triple::ppc:
3159   case llvm::Triple::ppc64:
3160   case llvm::Triple::ppc64le:
3161     return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
3162   case llvm::Triple::r600:
3163   case llvm::Triple::amdgcn:
3164     return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
3165   case llvm::Triple::systemz:
3166     return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
3167   case llvm::Triple::nvptx:
3168   case llvm::Triple::nvptx64:
3169     return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
3170   case llvm::Triple::wasm32:
3171   case llvm::Triple::wasm64:
3172     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
3173   default:
3174     return nullptr;
3175   }
3176 }
3177 
3178 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
3179                                               const CallExpr *E) {
3180   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
3181     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
3182     return EmitTargetArchBuiltinExpr(
3183         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
3184         getContext().getAuxTargetInfo()->getTriple().getArch());
3185   }
3186 
3187   return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
3188                                    getTarget().getTriple().getArch());
3189 }
3190 
3191 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
3192                                      NeonTypeFlags TypeFlags,
3193                                      bool V1Ty=false) {
3194   int IsQuad = TypeFlags.isQuad();
3195   switch (TypeFlags.getEltType()) {
3196   case NeonTypeFlags::Int8:
3197   case NeonTypeFlags::Poly8:
3198     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
3199   case NeonTypeFlags::Int16:
3200   case NeonTypeFlags::Poly16:
3201   case NeonTypeFlags::Float16:
3202     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
3203   case NeonTypeFlags::Int32:
3204     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
3205   case NeonTypeFlags::Int64:
3206   case NeonTypeFlags::Poly64:
3207     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
3208   case NeonTypeFlags::Poly128:
3209     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
3210     // There is a lot of i128 and f128 API missing.
3211     // so we use v16i8 to represent poly128 and get pattern matched.
3212     return llvm::VectorType::get(CGF->Int8Ty, 16);
3213   case NeonTypeFlags::Float32:
3214     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
3215   case NeonTypeFlags::Float64:
3216     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
3217   }
3218   llvm_unreachable("Unknown vector element type!");
3219 }
3220 
3221 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
3222                                           NeonTypeFlags IntTypeFlags) {
3223   int IsQuad = IntTypeFlags.isQuad();
3224   switch (IntTypeFlags.getEltType()) {
3225   case NeonTypeFlags::Int32:
3226     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
3227   case NeonTypeFlags::Int64:
3228     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
3229   default:
3230     llvm_unreachable("Type can't be converted to floating-point!");
3231   }
3232 }
3233 
3234 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
3235   unsigned nElts = V->getType()->getVectorNumElements();
3236   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
3237   return Builder.CreateShuffleVector(V, V, SV, "lane");
3238 }
3239 
3240 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
3241                                      const char *name,
3242                                      unsigned shift, bool rightshift) {
3243   unsigned j = 0;
3244   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3245        ai != ae; ++ai, ++j)
3246     if (shift > 0 && shift == j)
3247       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
3248     else
3249       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
3250 
3251   return Builder.CreateCall(F, Ops, name);
3252 }
3253 
3254 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
3255                                             bool neg) {
3256   int SV = cast<ConstantInt>(V)->getSExtValue();
3257   return ConstantInt::get(Ty, neg ? -SV : SV);
3258 }
3259 
3260 // \brief Right-shift a vector by a constant.
3261 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
3262                                           llvm::Type *Ty, bool usgn,
3263                                           const char *name) {
3264   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
3265 
3266   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
3267   int EltSize = VTy->getScalarSizeInBits();
3268 
3269   Vec = Builder.CreateBitCast(Vec, Ty);
3270 
3271   // lshr/ashr are undefined when the shift amount is equal to the vector
3272   // element size.
3273   if (ShiftAmt == EltSize) {
3274     if (usgn) {
3275       // Right-shifting an unsigned value by its size yields 0.
3276       return llvm::ConstantAggregateZero::get(VTy);
3277     } else {
3278       // Right-shifting a signed value by its size is equivalent
3279       // to a shift of size-1.
3280       --ShiftAmt;
3281       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
3282     }
3283   }
3284 
3285   Shift = EmitNeonShiftVector(Shift, Ty, false);
3286   if (usgn)
3287     return Builder.CreateLShr(Vec, Shift, name);
3288   else
3289     return Builder.CreateAShr(Vec, Shift, name);
3290 }
3291 
3292 enum {
3293   AddRetType = (1 << 0),
3294   Add1ArgType = (1 << 1),
3295   Add2ArgTypes = (1 << 2),
3296 
3297   VectorizeRetType = (1 << 3),
3298   VectorizeArgTypes = (1 << 4),
3299 
3300   InventFloatType = (1 << 5),
3301   UnsignedAlts = (1 << 6),
3302 
3303   Use64BitVectors = (1 << 7),
3304   Use128BitVectors = (1 << 8),
3305 
3306   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
3307   VectorRet = AddRetType | VectorizeRetType,
3308   VectorRetGetArgs01 =
3309       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
3310   FpCmpzModifiers =
3311       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
3312 };
3313 
3314 namespace {
3315 struct NeonIntrinsicInfo {
3316   const char *NameHint;
3317   unsigned BuiltinID;
3318   unsigned LLVMIntrinsic;
3319   unsigned AltLLVMIntrinsic;
3320   unsigned TypeModifier;
3321 
3322   bool operator<(unsigned RHSBuiltinID) const {
3323     return BuiltinID < RHSBuiltinID;
3324   }
3325   bool operator<(const NeonIntrinsicInfo &TE) const {
3326     return BuiltinID < TE.BuiltinID;
3327   }
3328 };
3329 } // end anonymous namespace
3330 
3331 #define NEONMAP0(NameBase) \
3332   { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
3333 
3334 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
3335   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3336       Intrinsic::LLVMIntrinsic, 0, TypeModifier }
3337 
3338 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
3339   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3340       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
3341       TypeModifier }
3342 
3343 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
3344   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3345   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3346   NEONMAP1(vabs_v, arm_neon_vabs, 0),
3347   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
3348   NEONMAP0(vaddhn_v),
3349   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
3350   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
3351   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
3352   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
3353   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
3354   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
3355   NEONMAP1(vcage_v, arm_neon_vacge, 0),
3356   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
3357   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
3358   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
3359   NEONMAP1(vcale_v, arm_neon_vacge, 0),
3360   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
3361   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
3362   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
3363   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
3364   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
3365   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3366   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3367   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3368   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3369   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
3370   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
3371   NEONMAP0(vcvt_f32_v),
3372   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3373   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3374   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3375   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3376   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3377   NEONMAP0(vcvt_s32_v),
3378   NEONMAP0(vcvt_s64_v),
3379   NEONMAP0(vcvt_u32_v),
3380   NEONMAP0(vcvt_u64_v),
3381   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
3382   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
3383   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
3384   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
3385   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
3386   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
3387   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
3388   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
3389   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
3390   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
3391   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
3392   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
3393   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
3394   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
3395   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
3396   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
3397   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
3398   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
3399   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
3400   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
3401   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
3402   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
3403   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
3404   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
3405   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
3406   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
3407   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
3408   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
3409   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
3410   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
3411   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
3412   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
3413   NEONMAP0(vcvtq_f32_v),
3414   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3415   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3416   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3417   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3418   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3419   NEONMAP0(vcvtq_s32_v),
3420   NEONMAP0(vcvtq_s64_v),
3421   NEONMAP0(vcvtq_u32_v),
3422   NEONMAP0(vcvtq_u64_v),
3423   NEONMAP0(vext_v),
3424   NEONMAP0(vextq_v),
3425   NEONMAP0(vfma_v),
3426   NEONMAP0(vfmaq_v),
3427   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3428   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3429   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3430   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3431   NEONMAP0(vld1_dup_v),
3432   NEONMAP1(vld1_v, arm_neon_vld1, 0),
3433   NEONMAP0(vld1q_dup_v),
3434   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
3435   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
3436   NEONMAP1(vld2_v, arm_neon_vld2, 0),
3437   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
3438   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
3439   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
3440   NEONMAP1(vld3_v, arm_neon_vld3, 0),
3441   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
3442   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
3443   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
3444   NEONMAP1(vld4_v, arm_neon_vld4, 0),
3445   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
3446   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
3447   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3448   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
3449   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
3450   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3451   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3452   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
3453   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
3454   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3455   NEONMAP0(vmovl_v),
3456   NEONMAP0(vmovn_v),
3457   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
3458   NEONMAP0(vmull_v),
3459   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
3460   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3461   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3462   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
3463   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3464   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3465   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
3466   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
3467   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
3468   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
3469   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
3470   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3471   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3472   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
3473   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
3474   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
3475   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
3476   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
3477   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
3478   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
3479   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
3480   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
3481   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
3482   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
3483   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3484   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3485   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3486   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3487   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3488   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3489   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
3490   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
3491   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3492   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3493   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
3494   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3495   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3496   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
3497   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
3498   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3499   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3500   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
3501   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
3502   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
3503   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
3504   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
3505   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
3506   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
3507   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
3508   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
3509   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
3510   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
3511   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
3512   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3513   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3514   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3515   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3516   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3517   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3518   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
3519   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
3520   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
3521   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
3522   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
3523   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
3524   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
3525   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
3526   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
3527   NEONMAP0(vshl_n_v),
3528   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3529   NEONMAP0(vshll_n_v),
3530   NEONMAP0(vshlq_n_v),
3531   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3532   NEONMAP0(vshr_n_v),
3533   NEONMAP0(vshrn_n_v),
3534   NEONMAP0(vshrq_n_v),
3535   NEONMAP1(vst1_v, arm_neon_vst1, 0),
3536   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
3537   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
3538   NEONMAP1(vst2_v, arm_neon_vst2, 0),
3539   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
3540   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
3541   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
3542   NEONMAP1(vst3_v, arm_neon_vst3, 0),
3543   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
3544   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
3545   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
3546   NEONMAP1(vst4_v, arm_neon_vst4, 0),
3547   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
3548   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
3549   NEONMAP0(vsubhn_v),
3550   NEONMAP0(vtrn_v),
3551   NEONMAP0(vtrnq_v),
3552   NEONMAP0(vtst_v),
3553   NEONMAP0(vtstq_v),
3554   NEONMAP0(vuzp_v),
3555   NEONMAP0(vuzpq_v),
3556   NEONMAP0(vzip_v),
3557   NEONMAP0(vzipq_v)
3558 };
3559 
3560 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
3561   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
3562   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
3563   NEONMAP0(vaddhn_v),
3564   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
3565   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
3566   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
3567   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
3568   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
3569   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
3570   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
3571   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
3572   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
3573   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
3574   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
3575   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
3576   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
3577   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
3578   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3579   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3580   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3581   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3582   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
3583   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
3584   NEONMAP0(vcvt_f32_v),
3585   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3586   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3587   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3588   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3589   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3590   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3591   NEONMAP0(vcvtq_f32_v),
3592   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3593   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3594   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3595   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3596   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3597   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3598   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
3599   NEONMAP0(vext_v),
3600   NEONMAP0(vextq_v),
3601   NEONMAP0(vfma_v),
3602   NEONMAP0(vfmaq_v),
3603   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3604   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3605   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3606   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3607   NEONMAP0(vmovl_v),
3608   NEONMAP0(vmovn_v),
3609   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
3610   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
3611   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
3612   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3613   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3614   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
3615   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
3616   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
3617   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3618   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3619   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
3620   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
3621   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
3622   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
3623   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
3624   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
3625   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
3626   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
3627   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
3628   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
3629   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
3630   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3631   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3632   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
3633   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3634   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
3635   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3636   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
3637   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
3638   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3639   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3640   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
3641   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3642   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3643   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
3644   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
3645   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3646   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3647   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3648   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3649   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3650   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3651   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3652   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3653   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
3654   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
3655   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
3656   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
3657   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
3658   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
3659   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
3660   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
3661   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
3662   NEONMAP0(vshl_n_v),
3663   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3664   NEONMAP0(vshll_n_v),
3665   NEONMAP0(vshlq_n_v),
3666   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3667   NEONMAP0(vshr_n_v),
3668   NEONMAP0(vshrn_n_v),
3669   NEONMAP0(vshrq_n_v),
3670   NEONMAP0(vsubhn_v),
3671   NEONMAP0(vtst_v),
3672   NEONMAP0(vtstq_v),
3673 };
3674 
3675 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3676   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3677   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3678   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3679   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3680   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3681   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3682   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3683   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3684   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3685   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3686   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3687   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3688   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3689   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3690   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3691   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3692   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3693   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3694   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3695   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3696   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3697   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3698   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3699   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3700   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3701   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3702   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3703   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3704   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3705   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3706   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3707   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3708   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3709   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3710   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3711   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3712   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3713   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3714   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3715   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3716   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3717   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3718   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3719   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3720   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3721   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3722   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3723   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3724   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3725   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3726   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3727   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3728   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3729   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3730   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3731   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3732   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3733   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3734   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3735   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3736   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3737   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3738   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3739   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3740   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3741   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3742   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3743   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3744   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3745   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3746   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3747   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3748   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3749   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3750   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3751   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3752   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3753   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3754   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3755   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3756   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3757   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3758   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3759   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3760   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3761   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3762   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3763   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3764   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3765   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3766   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3767   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3768   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3769   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3770   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3771   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3772   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3773   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3774   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3775   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3776   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3777   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3778   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3779   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3780   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3781   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3782   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3783   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3784   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3785   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3786   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3787   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3788   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3789   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3790   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3791   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3792   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3793   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3794   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3795   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3796   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3797   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3798   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3799   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3800   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3801   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3802   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3803   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3804   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3805   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3806   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3807   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3808   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3809   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3810   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3811   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3812   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3813   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3814   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3815   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3816   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3817   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3818   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3819   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3820   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3821   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3822   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3823   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3824   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3825   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3826   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3827   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3828   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3829   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3830   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3831   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3832   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3833   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3834   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3835   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3836   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3837   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3838   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3839   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3840   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3841   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3842   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3843   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3844   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3845   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3846   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3847   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3848   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3849   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3850   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3851   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3852   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3853   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3854   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3855   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3856   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3857   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3858   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3859   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3860   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3861   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3862   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3863   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3864   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3865   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3866   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3867   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3868 };
3869 
3870 #undef NEONMAP0
3871 #undef NEONMAP1
3872 #undef NEONMAP2
3873 
3874 static bool NEONSIMDIntrinsicsProvenSorted = false;
3875 
3876 static bool AArch64SIMDIntrinsicsProvenSorted = false;
3877 static bool AArch64SISDIntrinsicsProvenSorted = false;
3878 
3879 
3880 static const NeonIntrinsicInfo *
3881 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
3882                        unsigned BuiltinID, bool &MapProvenSorted) {
3883 
3884 #ifndef NDEBUG
3885   if (!MapProvenSorted) {
3886     assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3887     MapProvenSorted = true;
3888   }
3889 #endif
3890 
3891   const NeonIntrinsicInfo *Builtin =
3892       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3893 
3894   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
3895     return Builtin;
3896 
3897   return nullptr;
3898 }
3899 
3900 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3901                                                    unsigned Modifier,
3902                                                    llvm::Type *ArgType,
3903                                                    const CallExpr *E) {
3904   int VectorSize = 0;
3905   if (Modifier & Use64BitVectors)
3906     VectorSize = 64;
3907   else if (Modifier & Use128BitVectors)
3908     VectorSize = 128;
3909 
3910   // Return type.
3911   SmallVector<llvm::Type *, 3> Tys;
3912   if (Modifier & AddRetType) {
3913     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
3914     if (Modifier & VectorizeRetType)
3915       Ty = llvm::VectorType::get(
3916           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
3917 
3918     Tys.push_back(Ty);
3919   }
3920 
3921   // Arguments.
3922   if (Modifier & VectorizeArgTypes) {
3923     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
3924     ArgType = llvm::VectorType::get(ArgType, Elts);
3925   }
3926 
3927   if (Modifier & (Add1ArgType | Add2ArgTypes))
3928     Tys.push_back(ArgType);
3929 
3930   if (Modifier & Add2ArgTypes)
3931     Tys.push_back(ArgType);
3932 
3933   if (Modifier & InventFloatType)
3934     Tys.push_back(FloatTy);
3935 
3936   return CGM.getIntrinsic(IntrinsicID, Tys);
3937 }
3938 
3939 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
3940                                             const NeonIntrinsicInfo &SISDInfo,
3941                                             SmallVectorImpl<Value *> &Ops,
3942                                             const CallExpr *E) {
3943   unsigned BuiltinID = SISDInfo.BuiltinID;
3944   unsigned int Int = SISDInfo.LLVMIntrinsic;
3945   unsigned Modifier = SISDInfo.TypeModifier;
3946   const char *s = SISDInfo.NameHint;
3947 
3948   switch (BuiltinID) {
3949   case NEON::BI__builtin_neon_vcled_s64:
3950   case NEON::BI__builtin_neon_vcled_u64:
3951   case NEON::BI__builtin_neon_vcles_f32:
3952   case NEON::BI__builtin_neon_vcled_f64:
3953   case NEON::BI__builtin_neon_vcltd_s64:
3954   case NEON::BI__builtin_neon_vcltd_u64:
3955   case NEON::BI__builtin_neon_vclts_f32:
3956   case NEON::BI__builtin_neon_vcltd_f64:
3957   case NEON::BI__builtin_neon_vcales_f32:
3958   case NEON::BI__builtin_neon_vcaled_f64:
3959   case NEON::BI__builtin_neon_vcalts_f32:
3960   case NEON::BI__builtin_neon_vcaltd_f64:
3961     // Only one direction of comparisons actually exist, cmle is actually a cmge
3962     // with swapped operands. The table gives us the right intrinsic but we
3963     // still need to do the swap.
3964     std::swap(Ops[0], Ops[1]);
3965     break;
3966   }
3967 
3968   assert(Int && "Generic code assumes a valid intrinsic");
3969 
3970   // Determine the type(s) of this overloaded AArch64 intrinsic.
3971   const Expr *Arg = E->getArg(0);
3972   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3973   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3974 
3975   int j = 0;
3976   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3977   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3978        ai != ae; ++ai, ++j) {
3979     llvm::Type *ArgTy = ai->getType();
3980     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
3981              ArgTy->getPrimitiveSizeInBits())
3982       continue;
3983 
3984     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
3985     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
3986     // it before inserting.
3987     Ops[j] =
3988         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
3989     Ops[j] =
3990         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
3991   }
3992 
3993   Value *Result = CGF.EmitNeonCall(F, Ops, s);
3994   llvm::Type *ResultType = CGF.ConvertType(E->getType());
3995   if (ResultType->getPrimitiveSizeInBits() <
3996       Result->getType()->getPrimitiveSizeInBits())
3997     return CGF.Builder.CreateExtractElement(Result, C0);
3998 
3999   return CGF.Builder.CreateBitCast(Result, ResultType, s);
4000 }
4001 
4002 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
4003     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
4004     const char *NameHint, unsigned Modifier, const CallExpr *E,
4005     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
4006   // Get the last argument, which specifies the vector type.
4007   llvm::APSInt NeonTypeConst;
4008   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
4009   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
4010     return nullptr;
4011 
4012   // Determine the type of this overloaded NEON intrinsic.
4013   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
4014   bool Usgn = Type.isUnsigned();
4015   bool Quad = Type.isQuad();
4016 
4017   llvm::VectorType *VTy = GetNeonType(this, Type);
4018   llvm::Type *Ty = VTy;
4019   if (!Ty)
4020     return nullptr;
4021 
4022   auto getAlignmentValue32 = [&](Address addr) -> Value* {
4023     return Builder.getInt32(addr.getAlignment().getQuantity());
4024   };
4025 
4026   unsigned Int = LLVMIntrinsic;
4027   if ((Modifier & UnsignedAlts) && !Usgn)
4028     Int = AltLLVMIntrinsic;
4029 
4030   switch (BuiltinID) {
4031   default: break;
4032   case NEON::BI__builtin_neon_vabs_v:
4033   case NEON::BI__builtin_neon_vabsq_v:
4034     if (VTy->getElementType()->isFloatingPointTy())
4035       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
4036     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
4037   case NEON::BI__builtin_neon_vaddhn_v: {
4038     llvm::VectorType *SrcTy =
4039         llvm::VectorType::getExtendedElementVectorType(VTy);
4040 
4041     // %sum = add <4 x i32> %lhs, %rhs
4042     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4043     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4044     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
4045 
4046     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4047     Constant *ShiftAmt =
4048         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4049     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
4050 
4051     // %res = trunc <4 x i32> %high to <4 x i16>
4052     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
4053   }
4054   case NEON::BI__builtin_neon_vcale_v:
4055   case NEON::BI__builtin_neon_vcaleq_v:
4056   case NEON::BI__builtin_neon_vcalt_v:
4057   case NEON::BI__builtin_neon_vcaltq_v:
4058     std::swap(Ops[0], Ops[1]);
4059     LLVM_FALLTHROUGH;
4060   case NEON::BI__builtin_neon_vcage_v:
4061   case NEON::BI__builtin_neon_vcageq_v:
4062   case NEON::BI__builtin_neon_vcagt_v:
4063   case NEON::BI__builtin_neon_vcagtq_v: {
4064     llvm::Type *VecFlt = llvm::VectorType::get(
4065         VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
4066         VTy->getNumElements());
4067     llvm::Type *Tys[] = { VTy, VecFlt };
4068     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4069     return EmitNeonCall(F, Ops, NameHint);
4070   }
4071   case NEON::BI__builtin_neon_vclz_v:
4072   case NEON::BI__builtin_neon_vclzq_v:
4073     // We generate target-independent intrinsic, which needs a second argument
4074     // for whether or not clz of zero is undefined; on ARM it isn't.
4075     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
4076     break;
4077   case NEON::BI__builtin_neon_vcvt_f32_v:
4078   case NEON::BI__builtin_neon_vcvtq_f32_v:
4079     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4080     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
4081     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
4082                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
4083   case NEON::BI__builtin_neon_vcvt_n_f32_v:
4084   case NEON::BI__builtin_neon_vcvt_n_f64_v:
4085   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
4086   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
4087     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
4088     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
4089     Function *F = CGM.getIntrinsic(Int, Tys);
4090     return EmitNeonCall(F, Ops, "vcvt_n");
4091   }
4092   case NEON::BI__builtin_neon_vcvt_n_s32_v:
4093   case NEON::BI__builtin_neon_vcvt_n_u32_v:
4094   case NEON::BI__builtin_neon_vcvt_n_s64_v:
4095   case NEON::BI__builtin_neon_vcvt_n_u64_v:
4096   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
4097   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
4098   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
4099   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
4100     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
4101     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4102     return EmitNeonCall(F, Ops, "vcvt_n");
4103   }
4104   case NEON::BI__builtin_neon_vcvt_s32_v:
4105   case NEON::BI__builtin_neon_vcvt_u32_v:
4106   case NEON::BI__builtin_neon_vcvt_s64_v:
4107   case NEON::BI__builtin_neon_vcvt_u64_v:
4108   case NEON::BI__builtin_neon_vcvtq_s32_v:
4109   case NEON::BI__builtin_neon_vcvtq_u32_v:
4110   case NEON::BI__builtin_neon_vcvtq_s64_v:
4111   case NEON::BI__builtin_neon_vcvtq_u64_v: {
4112     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
4113     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
4114                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
4115   }
4116   case NEON::BI__builtin_neon_vcvta_s32_v:
4117   case NEON::BI__builtin_neon_vcvta_s64_v:
4118   case NEON::BI__builtin_neon_vcvta_u32_v:
4119   case NEON::BI__builtin_neon_vcvta_u64_v:
4120   case NEON::BI__builtin_neon_vcvtaq_s32_v:
4121   case NEON::BI__builtin_neon_vcvtaq_s64_v:
4122   case NEON::BI__builtin_neon_vcvtaq_u32_v:
4123   case NEON::BI__builtin_neon_vcvtaq_u64_v:
4124   case NEON::BI__builtin_neon_vcvtn_s32_v:
4125   case NEON::BI__builtin_neon_vcvtn_s64_v:
4126   case NEON::BI__builtin_neon_vcvtn_u32_v:
4127   case NEON::BI__builtin_neon_vcvtn_u64_v:
4128   case NEON::BI__builtin_neon_vcvtnq_s32_v:
4129   case NEON::BI__builtin_neon_vcvtnq_s64_v:
4130   case NEON::BI__builtin_neon_vcvtnq_u32_v:
4131   case NEON::BI__builtin_neon_vcvtnq_u64_v:
4132   case NEON::BI__builtin_neon_vcvtp_s32_v:
4133   case NEON::BI__builtin_neon_vcvtp_s64_v:
4134   case NEON::BI__builtin_neon_vcvtp_u32_v:
4135   case NEON::BI__builtin_neon_vcvtp_u64_v:
4136   case NEON::BI__builtin_neon_vcvtpq_s32_v:
4137   case NEON::BI__builtin_neon_vcvtpq_s64_v:
4138   case NEON::BI__builtin_neon_vcvtpq_u32_v:
4139   case NEON::BI__builtin_neon_vcvtpq_u64_v:
4140   case NEON::BI__builtin_neon_vcvtm_s32_v:
4141   case NEON::BI__builtin_neon_vcvtm_s64_v:
4142   case NEON::BI__builtin_neon_vcvtm_u32_v:
4143   case NEON::BI__builtin_neon_vcvtm_u64_v:
4144   case NEON::BI__builtin_neon_vcvtmq_s32_v:
4145   case NEON::BI__builtin_neon_vcvtmq_s64_v:
4146   case NEON::BI__builtin_neon_vcvtmq_u32_v:
4147   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
4148     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
4149     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
4150   }
4151   case NEON::BI__builtin_neon_vext_v:
4152   case NEON::BI__builtin_neon_vextq_v: {
4153     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
4154     SmallVector<uint32_t, 16> Indices;
4155     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4156       Indices.push_back(i+CV);
4157 
4158     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4159     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4160     return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
4161   }
4162   case NEON::BI__builtin_neon_vfma_v:
4163   case NEON::BI__builtin_neon_vfmaq_v: {
4164     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
4165     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4166     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4167     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4168 
4169     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
4170     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
4171   }
4172   case NEON::BI__builtin_neon_vld1_v:
4173   case NEON::BI__builtin_neon_vld1q_v: {
4174     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4175     Ops.push_back(getAlignmentValue32(PtrOp0));
4176     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
4177   }
4178   case NEON::BI__builtin_neon_vld2_v:
4179   case NEON::BI__builtin_neon_vld2q_v:
4180   case NEON::BI__builtin_neon_vld3_v:
4181   case NEON::BI__builtin_neon_vld3q_v:
4182   case NEON::BI__builtin_neon_vld4_v:
4183   case NEON::BI__builtin_neon_vld4q_v: {
4184     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4185     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4186     Value *Align = getAlignmentValue32(PtrOp1);
4187     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
4188     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4189     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4190     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4191   }
4192   case NEON::BI__builtin_neon_vld1_dup_v:
4193   case NEON::BI__builtin_neon_vld1q_dup_v: {
4194     Value *V = UndefValue::get(Ty);
4195     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
4196     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
4197     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
4198     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4199     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
4200     return EmitNeonSplat(Ops[0], CI);
4201   }
4202   case NEON::BI__builtin_neon_vld2_lane_v:
4203   case NEON::BI__builtin_neon_vld2q_lane_v:
4204   case NEON::BI__builtin_neon_vld3_lane_v:
4205   case NEON::BI__builtin_neon_vld3q_lane_v:
4206   case NEON::BI__builtin_neon_vld4_lane_v:
4207   case NEON::BI__builtin_neon_vld4q_lane_v: {
4208     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4209     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4210     for (unsigned I = 2; I < Ops.size() - 1; ++I)
4211       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
4212     Ops.push_back(getAlignmentValue32(PtrOp1));
4213     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
4214     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4215     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4216     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4217   }
4218   case NEON::BI__builtin_neon_vmovl_v: {
4219     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
4220     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
4221     if (Usgn)
4222       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
4223     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
4224   }
4225   case NEON::BI__builtin_neon_vmovn_v: {
4226     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4227     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
4228     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
4229   }
4230   case NEON::BI__builtin_neon_vmull_v:
4231     // FIXME: the integer vmull operations could be emitted in terms of pure
4232     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
4233     // hoisting the exts outside loops. Until global ISel comes along that can
4234     // see through such movement this leads to bad CodeGen. So we need an
4235     // intrinsic for now.
4236     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
4237     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
4238     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
4239   case NEON::BI__builtin_neon_vpadal_v:
4240   case NEON::BI__builtin_neon_vpadalq_v: {
4241     // The source operand type has twice as many elements of half the size.
4242     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4243     llvm::Type *EltTy =
4244       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4245     llvm::Type *NarrowTy =
4246       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4247     llvm::Type *Tys[2] = { Ty, NarrowTy };
4248     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
4249   }
4250   case NEON::BI__builtin_neon_vpaddl_v:
4251   case NEON::BI__builtin_neon_vpaddlq_v: {
4252     // The source operand type has twice as many elements of half the size.
4253     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4254     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4255     llvm::Type *NarrowTy =
4256       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4257     llvm::Type *Tys[2] = { Ty, NarrowTy };
4258     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
4259   }
4260   case NEON::BI__builtin_neon_vqdmlal_v:
4261   case NEON::BI__builtin_neon_vqdmlsl_v: {
4262     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
4263     Ops[1] =
4264         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
4265     Ops.resize(2);
4266     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
4267   }
4268   case NEON::BI__builtin_neon_vqshl_n_v:
4269   case NEON::BI__builtin_neon_vqshlq_n_v:
4270     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
4271                         1, false);
4272   case NEON::BI__builtin_neon_vqshlu_n_v:
4273   case NEON::BI__builtin_neon_vqshluq_n_v:
4274     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
4275                         1, false);
4276   case NEON::BI__builtin_neon_vrecpe_v:
4277   case NEON::BI__builtin_neon_vrecpeq_v:
4278   case NEON::BI__builtin_neon_vrsqrte_v:
4279   case NEON::BI__builtin_neon_vrsqrteq_v:
4280     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
4281     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
4282 
4283   case NEON::BI__builtin_neon_vrshr_n_v:
4284   case NEON::BI__builtin_neon_vrshrq_n_v:
4285     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
4286                         1, true);
4287   case NEON::BI__builtin_neon_vshl_n_v:
4288   case NEON::BI__builtin_neon_vshlq_n_v:
4289     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
4290     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
4291                              "vshl_n");
4292   case NEON::BI__builtin_neon_vshll_n_v: {
4293     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
4294     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4295     if (Usgn)
4296       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
4297     else
4298       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
4299     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
4300     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
4301   }
4302   case NEON::BI__builtin_neon_vshrn_n_v: {
4303     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4304     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4305     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
4306     if (Usgn)
4307       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
4308     else
4309       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
4310     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
4311   }
4312   case NEON::BI__builtin_neon_vshr_n_v:
4313   case NEON::BI__builtin_neon_vshrq_n_v:
4314     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
4315   case NEON::BI__builtin_neon_vst1_v:
4316   case NEON::BI__builtin_neon_vst1q_v:
4317   case NEON::BI__builtin_neon_vst2_v:
4318   case NEON::BI__builtin_neon_vst2q_v:
4319   case NEON::BI__builtin_neon_vst3_v:
4320   case NEON::BI__builtin_neon_vst3q_v:
4321   case NEON::BI__builtin_neon_vst4_v:
4322   case NEON::BI__builtin_neon_vst4q_v:
4323   case NEON::BI__builtin_neon_vst2_lane_v:
4324   case NEON::BI__builtin_neon_vst2q_lane_v:
4325   case NEON::BI__builtin_neon_vst3_lane_v:
4326   case NEON::BI__builtin_neon_vst3q_lane_v:
4327   case NEON::BI__builtin_neon_vst4_lane_v:
4328   case NEON::BI__builtin_neon_vst4q_lane_v: {
4329     llvm::Type *Tys[] = {Int8PtrTy, Ty};
4330     Ops.push_back(getAlignmentValue32(PtrOp0));
4331     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
4332   }
4333   case NEON::BI__builtin_neon_vsubhn_v: {
4334     llvm::VectorType *SrcTy =
4335         llvm::VectorType::getExtendedElementVectorType(VTy);
4336 
4337     // %sum = add <4 x i32> %lhs, %rhs
4338     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4339     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4340     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
4341 
4342     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4343     Constant *ShiftAmt =
4344         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4345     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
4346 
4347     // %res = trunc <4 x i32> %high to <4 x i16>
4348     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
4349   }
4350   case NEON::BI__builtin_neon_vtrn_v:
4351   case NEON::BI__builtin_neon_vtrnq_v: {
4352     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4353     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4354     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4355     Value *SV = nullptr;
4356 
4357     for (unsigned vi = 0; vi != 2; ++vi) {
4358       SmallVector<uint32_t, 16> Indices;
4359       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4360         Indices.push_back(i+vi);
4361         Indices.push_back(i+e+vi);
4362       }
4363       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4364       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
4365       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4366     }
4367     return SV;
4368   }
4369   case NEON::BI__builtin_neon_vtst_v:
4370   case NEON::BI__builtin_neon_vtstq_v: {
4371     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4372     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4373     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4374     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4375                                 ConstantAggregateZero::get(Ty));
4376     return Builder.CreateSExt(Ops[0], Ty, "vtst");
4377   }
4378   case NEON::BI__builtin_neon_vuzp_v:
4379   case NEON::BI__builtin_neon_vuzpq_v: {
4380     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4381     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4382     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4383     Value *SV = nullptr;
4384 
4385     for (unsigned vi = 0; vi != 2; ++vi) {
4386       SmallVector<uint32_t, 16> Indices;
4387       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4388         Indices.push_back(2*i+vi);
4389 
4390       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4391       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
4392       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4393     }
4394     return SV;
4395   }
4396   case NEON::BI__builtin_neon_vzip_v:
4397   case NEON::BI__builtin_neon_vzipq_v: {
4398     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4399     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4400     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4401     Value *SV = nullptr;
4402 
4403     for (unsigned vi = 0; vi != 2; ++vi) {
4404       SmallVector<uint32_t, 16> Indices;
4405       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4406         Indices.push_back((i + vi*e) >> 1);
4407         Indices.push_back(((i + vi*e) >> 1)+e);
4408       }
4409       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4410       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
4411       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4412     }
4413     return SV;
4414   }
4415   }
4416 
4417   assert(Int && "Expected valid intrinsic number");
4418 
4419   // Determine the type(s) of this overloaded AArch64 intrinsic.
4420   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
4421 
4422   Value *Result = EmitNeonCall(F, Ops, NameHint);
4423   llvm::Type *ResultType = ConvertType(E->getType());
4424   // AArch64 intrinsic one-element vector type cast to
4425   // scalar type expected by the builtin
4426   return Builder.CreateBitCast(Result, ResultType, NameHint);
4427 }
4428 
4429 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
4430     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
4431     const CmpInst::Predicate Ip, const Twine &Name) {
4432   llvm::Type *OTy = Op->getType();
4433 
4434   // FIXME: this is utterly horrific. We should not be looking at previous
4435   // codegen context to find out what needs doing. Unfortunately TableGen
4436   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
4437   // (etc).
4438   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
4439     OTy = BI->getOperand(0)->getType();
4440 
4441   Op = Builder.CreateBitCast(Op, OTy);
4442   if (OTy->getScalarType()->isFloatingPointTy()) {
4443     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
4444   } else {
4445     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
4446   }
4447   return Builder.CreateSExt(Op, Ty, Name);
4448 }
4449 
4450 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
4451                                  Value *ExtOp, Value *IndexOp,
4452                                  llvm::Type *ResTy, unsigned IntID,
4453                                  const char *Name) {
4454   SmallVector<Value *, 2> TblOps;
4455   if (ExtOp)
4456     TblOps.push_back(ExtOp);
4457 
4458   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
4459   SmallVector<uint32_t, 16> Indices;
4460   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
4461   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
4462     Indices.push_back(2*i);
4463     Indices.push_back(2*i+1);
4464   }
4465 
4466   int PairPos = 0, End = Ops.size() - 1;
4467   while (PairPos < End) {
4468     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4469                                                      Ops[PairPos+1], Indices,
4470                                                      Name));
4471     PairPos += 2;
4472   }
4473 
4474   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
4475   // of the 128-bit lookup table with zero.
4476   if (PairPos == End) {
4477     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
4478     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4479                                                      ZeroTbl, Indices, Name));
4480   }
4481 
4482   Function *TblF;
4483   TblOps.push_back(IndexOp);
4484   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
4485 
4486   return CGF.EmitNeonCall(TblF, TblOps, Name);
4487 }
4488 
4489 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
4490   unsigned Value;
4491   switch (BuiltinID) {
4492   default:
4493     return nullptr;
4494   case ARM::BI__builtin_arm_nop:
4495     Value = 0;
4496     break;
4497   case ARM::BI__builtin_arm_yield:
4498   case ARM::BI__yield:
4499     Value = 1;
4500     break;
4501   case ARM::BI__builtin_arm_wfe:
4502   case ARM::BI__wfe:
4503     Value = 2;
4504     break;
4505   case ARM::BI__builtin_arm_wfi:
4506   case ARM::BI__wfi:
4507     Value = 3;
4508     break;
4509   case ARM::BI__builtin_arm_sev:
4510   case ARM::BI__sev:
4511     Value = 4;
4512     break;
4513   case ARM::BI__builtin_arm_sevl:
4514   case ARM::BI__sevl:
4515     Value = 5;
4516     break;
4517   }
4518 
4519   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
4520                             llvm::ConstantInt::get(Int32Ty, Value));
4521 }
4522 
4523 // Generates the IR for the read/write special register builtin,
4524 // ValueType is the type of the value that is to be written or read,
4525 // RegisterType is the type of the register being written to or read from.
4526 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
4527                                          const CallExpr *E,
4528                                          llvm::Type *RegisterType,
4529                                          llvm::Type *ValueType,
4530                                          bool IsRead,
4531                                          StringRef SysReg = "") {
4532   // write and register intrinsics only support 32 and 64 bit operations.
4533   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
4534           && "Unsupported size for register.");
4535 
4536   CodeGen::CGBuilderTy &Builder = CGF.Builder;
4537   CodeGen::CodeGenModule &CGM = CGF.CGM;
4538   LLVMContext &Context = CGM.getLLVMContext();
4539 
4540   if (SysReg.empty()) {
4541     const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
4542     SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
4543   }
4544 
4545   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
4546   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4547   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4548 
4549   llvm::Type *Types[] = { RegisterType };
4550 
4551   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
4552   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
4553             && "Can't fit 64-bit value in 32-bit register");
4554 
4555   if (IsRead) {
4556     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
4557     llvm::Value *Call = Builder.CreateCall(F, Metadata);
4558 
4559     if (MixedTypes)
4560       // Read into 64 bit register and then truncate result to 32 bit.
4561       return Builder.CreateTrunc(Call, ValueType);
4562 
4563     if (ValueType->isPointerTy())
4564       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
4565       return Builder.CreateIntToPtr(Call, ValueType);
4566 
4567     return Call;
4568   }
4569 
4570   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
4571   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
4572   if (MixedTypes) {
4573     // Extend 32 bit write value to 64 bit to pass to write.
4574     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
4575     return Builder.CreateCall(F, { Metadata, ArgValue });
4576   }
4577 
4578   if (ValueType->isPointerTy()) {
4579     // Have VoidPtrTy ArgValue but want to return an i32/i64.
4580     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
4581     return Builder.CreateCall(F, { Metadata, ArgValue });
4582   }
4583 
4584   return Builder.CreateCall(F, { Metadata, ArgValue });
4585 }
4586 
4587 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
4588 /// argument that specifies the vector type.
4589 static bool HasExtraNeonArgument(unsigned BuiltinID) {
4590   switch (BuiltinID) {
4591   default: break;
4592   case NEON::BI__builtin_neon_vget_lane_i8:
4593   case NEON::BI__builtin_neon_vget_lane_i16:
4594   case NEON::BI__builtin_neon_vget_lane_i32:
4595   case NEON::BI__builtin_neon_vget_lane_i64:
4596   case NEON::BI__builtin_neon_vget_lane_f32:
4597   case NEON::BI__builtin_neon_vgetq_lane_i8:
4598   case NEON::BI__builtin_neon_vgetq_lane_i16:
4599   case NEON::BI__builtin_neon_vgetq_lane_i32:
4600   case NEON::BI__builtin_neon_vgetq_lane_i64:
4601   case NEON::BI__builtin_neon_vgetq_lane_f32:
4602   case NEON::BI__builtin_neon_vset_lane_i8:
4603   case NEON::BI__builtin_neon_vset_lane_i16:
4604   case NEON::BI__builtin_neon_vset_lane_i32:
4605   case NEON::BI__builtin_neon_vset_lane_i64:
4606   case NEON::BI__builtin_neon_vset_lane_f32:
4607   case NEON::BI__builtin_neon_vsetq_lane_i8:
4608   case NEON::BI__builtin_neon_vsetq_lane_i16:
4609   case NEON::BI__builtin_neon_vsetq_lane_i32:
4610   case NEON::BI__builtin_neon_vsetq_lane_i64:
4611   case NEON::BI__builtin_neon_vsetq_lane_f32:
4612   case NEON::BI__builtin_neon_vsha1h_u32:
4613   case NEON::BI__builtin_neon_vsha1cq_u32:
4614   case NEON::BI__builtin_neon_vsha1pq_u32:
4615   case NEON::BI__builtin_neon_vsha1mq_u32:
4616   case ARM::BI_MoveToCoprocessor:
4617   case ARM::BI_MoveToCoprocessor2:
4618     return false;
4619   }
4620   return true;
4621 }
4622 
4623 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
4624                                            const CallExpr *E) {
4625   if (auto Hint = GetValueForARMHint(BuiltinID))
4626     return Hint;
4627 
4628   if (BuiltinID == ARM::BI__emit) {
4629     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
4630     llvm::FunctionType *FTy =
4631         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
4632 
4633     APSInt Value;
4634     if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
4635       llvm_unreachable("Sema will ensure that the parameter is constant");
4636 
4637     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
4638 
4639     llvm::InlineAsm *Emit =
4640         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
4641                                  /*SideEffects=*/true)
4642                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
4643                                  /*SideEffects=*/true);
4644 
4645     return Builder.CreateCall(Emit);
4646   }
4647 
4648   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
4649     Value *Option = EmitScalarExpr(E->getArg(0));
4650     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
4651   }
4652 
4653   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
4654     Value *Address = EmitScalarExpr(E->getArg(0));
4655     Value *RW      = EmitScalarExpr(E->getArg(1));
4656     Value *IsData  = EmitScalarExpr(E->getArg(2));
4657 
4658     // Locality is not supported on ARM target
4659     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
4660 
4661     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4662     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4663   }
4664 
4665   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
4666     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4667     return Builder.CreateCall(
4668         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4669   }
4670 
4671   if (BuiltinID == ARM::BI__clear_cache) {
4672     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4673     const FunctionDecl *FD = E->getDirectCallee();
4674     Value *Ops[2];
4675     for (unsigned i = 0; i < 2; i++)
4676       Ops[i] = EmitScalarExpr(E->getArg(i));
4677     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4678     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4679     StringRef Name = FD->getName();
4680     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4681   }
4682 
4683   if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4684       BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4685     Function *F;
4686 
4687     switch (BuiltinID) {
4688     default: llvm_unreachable("unexpected builtin");
4689     case ARM::BI__builtin_arm_mcrr:
4690       F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4691       break;
4692     case ARM::BI__builtin_arm_mcrr2:
4693       F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4694       break;
4695     }
4696 
4697     // MCRR{2} instruction has 5 operands but
4698     // the intrinsic has 4 because Rt and Rt2
4699     // are represented as a single unsigned 64
4700     // bit integer in the intrinsic definition
4701     // but internally it's represented as 2 32
4702     // bit integers.
4703 
4704     Value *Coproc = EmitScalarExpr(E->getArg(0));
4705     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4706     Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4707     Value *CRm = EmitScalarExpr(E->getArg(3));
4708 
4709     Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4710     Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4711     Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4712     Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4713 
4714     return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4715   }
4716 
4717   if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4718       BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4719     Function *F;
4720 
4721     switch (BuiltinID) {
4722     default: llvm_unreachable("unexpected builtin");
4723     case ARM::BI__builtin_arm_mrrc:
4724       F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4725       break;
4726     case ARM::BI__builtin_arm_mrrc2:
4727       F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4728       break;
4729     }
4730 
4731     Value *Coproc = EmitScalarExpr(E->getArg(0));
4732     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4733     Value *CRm  = EmitScalarExpr(E->getArg(2));
4734     Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4735 
4736     // Returns an unsigned 64 bit integer, represented
4737     // as two 32 bit integers.
4738 
4739     Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4740     Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4741     Rt = Builder.CreateZExt(Rt, Int64Ty);
4742     Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4743 
4744     Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4745     RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4746     RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4747 
4748     return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4749   }
4750 
4751   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4752       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4753         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4754        getContext().getTypeSize(E->getType()) == 64) ||
4755       BuiltinID == ARM::BI__ldrexd) {
4756     Function *F;
4757 
4758     switch (BuiltinID) {
4759     default: llvm_unreachable("unexpected builtin");
4760     case ARM::BI__builtin_arm_ldaex:
4761       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4762       break;
4763     case ARM::BI__builtin_arm_ldrexd:
4764     case ARM::BI__builtin_arm_ldrex:
4765     case ARM::BI__ldrexd:
4766       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4767       break;
4768     }
4769 
4770     Value *LdPtr = EmitScalarExpr(E->getArg(0));
4771     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4772                                     "ldrexd");
4773 
4774     Value *Val0 = Builder.CreateExtractValue(Val, 1);
4775     Value *Val1 = Builder.CreateExtractValue(Val, 0);
4776     Val0 = Builder.CreateZExt(Val0, Int64Ty);
4777     Val1 = Builder.CreateZExt(Val1, Int64Ty);
4778 
4779     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4780     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4781     Val = Builder.CreateOr(Val, Val1);
4782     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4783   }
4784 
4785   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4786       BuiltinID == ARM::BI__builtin_arm_ldaex) {
4787     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4788 
4789     QualType Ty = E->getType();
4790     llvm::Type *RealResTy = ConvertType(Ty);
4791     llvm::Type *PtrTy = llvm::IntegerType::get(
4792         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
4793     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
4794 
4795     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4796                                        ? Intrinsic::arm_ldaex
4797                                        : Intrinsic::arm_ldrex,
4798                                    PtrTy);
4799     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4800 
4801     if (RealResTy->isPointerTy())
4802       return Builder.CreateIntToPtr(Val, RealResTy);
4803     else {
4804       llvm::Type *IntResTy = llvm::IntegerType::get(
4805           getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
4806       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4807       return Builder.CreateBitCast(Val, RealResTy);
4808     }
4809   }
4810 
4811   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
4812       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4813         BuiltinID == ARM::BI__builtin_arm_strex) &&
4814        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
4815     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4816                                        ? Intrinsic::arm_stlexd
4817                                        : Intrinsic::arm_strexd);
4818     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
4819 
4820     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4821     Value *Val = EmitScalarExpr(E->getArg(0));
4822     Builder.CreateStore(Val, Tmp);
4823 
4824     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4825     Val = Builder.CreateLoad(LdPtr);
4826 
4827     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4828     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4829     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4830     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4831   }
4832 
4833   if (BuiltinID == ARM::BI__builtin_arm_strex ||
4834       BuiltinID == ARM::BI__builtin_arm_stlex) {
4835     Value *StoreVal = EmitScalarExpr(E->getArg(0));
4836     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4837 
4838     QualType Ty = E->getArg(0)->getType();
4839     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4840                                                  getContext().getTypeSize(Ty));
4841     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4842 
4843     if (StoreVal->getType()->isPointerTy())
4844       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4845     else {
4846       llvm::Type *IntTy = llvm::IntegerType::get(
4847           getLLVMContext(),
4848           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
4849       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
4850       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4851     }
4852 
4853     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4854                                        ? Intrinsic::arm_stlex
4855                                        : Intrinsic::arm_strex,
4856                                    StoreAddr->getType());
4857     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4858   }
4859 
4860   switch (BuiltinID) {
4861   case ARM::BI__iso_volatile_load8:
4862   case ARM::BI__iso_volatile_load16:
4863   case ARM::BI__iso_volatile_load32:
4864   case ARM::BI__iso_volatile_load64: {
4865     Value *Ptr = EmitScalarExpr(E->getArg(0));
4866     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4867     CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
4868     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4869                                              LoadSize.getQuantity() * 8);
4870     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4871     llvm::LoadInst *Load =
4872       Builder.CreateAlignedLoad(Ptr, LoadSize);
4873     Load->setVolatile(true);
4874     return Load;
4875   }
4876   case ARM::BI__iso_volatile_store8:
4877   case ARM::BI__iso_volatile_store16:
4878   case ARM::BI__iso_volatile_store32:
4879   case ARM::BI__iso_volatile_store64: {
4880     Value *Ptr = EmitScalarExpr(E->getArg(0));
4881     Value *Value = EmitScalarExpr(E->getArg(1));
4882     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4883     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
4884     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4885                                              StoreSize.getQuantity() * 8);
4886     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4887     llvm::StoreInst *Store =
4888       Builder.CreateAlignedStore(Value, Ptr,
4889                                  StoreSize);
4890     Store->setVolatile(true);
4891     return Store;
4892   }
4893   }
4894 
4895   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
4896     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4897     return Builder.CreateCall(F);
4898   }
4899 
4900   // CRC32
4901   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4902   switch (BuiltinID) {
4903   case ARM::BI__builtin_arm_crc32b:
4904     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
4905   case ARM::BI__builtin_arm_crc32cb:
4906     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
4907   case ARM::BI__builtin_arm_crc32h:
4908     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
4909   case ARM::BI__builtin_arm_crc32ch:
4910     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
4911   case ARM::BI__builtin_arm_crc32w:
4912   case ARM::BI__builtin_arm_crc32d:
4913     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
4914   case ARM::BI__builtin_arm_crc32cw:
4915   case ARM::BI__builtin_arm_crc32cd:
4916     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
4917   }
4918 
4919   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4920     Value *Arg0 = EmitScalarExpr(E->getArg(0));
4921     Value *Arg1 = EmitScalarExpr(E->getArg(1));
4922 
4923     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
4924     // intrinsics, hence we need different codegen for these cases.
4925     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
4926         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
4927       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4928       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
4929       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
4930       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
4931 
4932       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4933       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
4934       return Builder.CreateCall(F, {Res, Arg1b});
4935     } else {
4936       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
4937 
4938       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4939       return Builder.CreateCall(F, {Arg0, Arg1});
4940     }
4941   }
4942 
4943   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
4944       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4945       BuiltinID == ARM::BI__builtin_arm_rsrp ||
4946       BuiltinID == ARM::BI__builtin_arm_wsr ||
4947       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
4948       BuiltinID == ARM::BI__builtin_arm_wsrp) {
4949 
4950     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
4951                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4952                   BuiltinID == ARM::BI__builtin_arm_rsrp;
4953 
4954     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
4955                             BuiltinID == ARM::BI__builtin_arm_wsrp;
4956 
4957     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4958                    BuiltinID == ARM::BI__builtin_arm_wsr64;
4959 
4960     llvm::Type *ValueType;
4961     llvm::Type *RegisterType;
4962     if (IsPointerBuiltin) {
4963       ValueType = VoidPtrTy;
4964       RegisterType = Int32Ty;
4965     } else if (Is64Bit) {
4966       ValueType = RegisterType = Int64Ty;
4967     } else {
4968       ValueType = RegisterType = Int32Ty;
4969     }
4970 
4971     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4972   }
4973 
4974   // Find out if any arguments are required to be integer constant
4975   // expressions.
4976   unsigned ICEArguments = 0;
4977   ASTContext::GetBuiltinTypeError Error;
4978   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4979   assert(Error == ASTContext::GE_None && "Should not codegen an error");
4980 
4981   auto getAlignmentValue32 = [&](Address addr) -> Value* {
4982     return Builder.getInt32(addr.getAlignment().getQuantity());
4983   };
4984 
4985   Address PtrOp0 = Address::invalid();
4986   Address PtrOp1 = Address::invalid();
4987   SmallVector<Value*, 4> Ops;
4988   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
4989   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
4990   for (unsigned i = 0, e = NumArgs; i != e; i++) {
4991     if (i == 0) {
4992       switch (BuiltinID) {
4993       case NEON::BI__builtin_neon_vld1_v:
4994       case NEON::BI__builtin_neon_vld1q_v:
4995       case NEON::BI__builtin_neon_vld1q_lane_v:
4996       case NEON::BI__builtin_neon_vld1_lane_v:
4997       case NEON::BI__builtin_neon_vld1_dup_v:
4998       case NEON::BI__builtin_neon_vld1q_dup_v:
4999       case NEON::BI__builtin_neon_vst1_v:
5000       case NEON::BI__builtin_neon_vst1q_v:
5001       case NEON::BI__builtin_neon_vst1q_lane_v:
5002       case NEON::BI__builtin_neon_vst1_lane_v:
5003       case NEON::BI__builtin_neon_vst2_v:
5004       case NEON::BI__builtin_neon_vst2q_v:
5005       case NEON::BI__builtin_neon_vst2_lane_v:
5006       case NEON::BI__builtin_neon_vst2q_lane_v:
5007       case NEON::BI__builtin_neon_vst3_v:
5008       case NEON::BI__builtin_neon_vst3q_v:
5009       case NEON::BI__builtin_neon_vst3_lane_v:
5010       case NEON::BI__builtin_neon_vst3q_lane_v:
5011       case NEON::BI__builtin_neon_vst4_v:
5012       case NEON::BI__builtin_neon_vst4q_v:
5013       case NEON::BI__builtin_neon_vst4_lane_v:
5014       case NEON::BI__builtin_neon_vst4q_lane_v:
5015         // Get the alignment for the argument in addition to the value;
5016         // we'll use it later.
5017         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
5018         Ops.push_back(PtrOp0.getPointer());
5019         continue;
5020       }
5021     }
5022     if (i == 1) {
5023       switch (BuiltinID) {
5024       case NEON::BI__builtin_neon_vld2_v:
5025       case NEON::BI__builtin_neon_vld2q_v:
5026       case NEON::BI__builtin_neon_vld3_v:
5027       case NEON::BI__builtin_neon_vld3q_v:
5028       case NEON::BI__builtin_neon_vld4_v:
5029       case NEON::BI__builtin_neon_vld4q_v:
5030       case NEON::BI__builtin_neon_vld2_lane_v:
5031       case NEON::BI__builtin_neon_vld2q_lane_v:
5032       case NEON::BI__builtin_neon_vld3_lane_v:
5033       case NEON::BI__builtin_neon_vld3q_lane_v:
5034       case NEON::BI__builtin_neon_vld4_lane_v:
5035       case NEON::BI__builtin_neon_vld4q_lane_v:
5036       case NEON::BI__builtin_neon_vld2_dup_v:
5037       case NEON::BI__builtin_neon_vld3_dup_v:
5038       case NEON::BI__builtin_neon_vld4_dup_v:
5039         // Get the alignment for the argument in addition to the value;
5040         // we'll use it later.
5041         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
5042         Ops.push_back(PtrOp1.getPointer());
5043         continue;
5044       }
5045     }
5046 
5047     if ((ICEArguments & (1 << i)) == 0) {
5048       Ops.push_back(EmitScalarExpr(E->getArg(i)));
5049     } else {
5050       // If this is required to be a constant, constant fold it so that we know
5051       // that the generated intrinsic gets a ConstantInt.
5052       llvm::APSInt Result;
5053       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5054       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
5055       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5056     }
5057   }
5058 
5059   switch (BuiltinID) {
5060   default: break;
5061 
5062   case NEON::BI__builtin_neon_vget_lane_i8:
5063   case NEON::BI__builtin_neon_vget_lane_i16:
5064   case NEON::BI__builtin_neon_vget_lane_i32:
5065   case NEON::BI__builtin_neon_vget_lane_i64:
5066   case NEON::BI__builtin_neon_vget_lane_f32:
5067   case NEON::BI__builtin_neon_vgetq_lane_i8:
5068   case NEON::BI__builtin_neon_vgetq_lane_i16:
5069   case NEON::BI__builtin_neon_vgetq_lane_i32:
5070   case NEON::BI__builtin_neon_vgetq_lane_i64:
5071   case NEON::BI__builtin_neon_vgetq_lane_f32:
5072     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5073 
5074   case NEON::BI__builtin_neon_vset_lane_i8:
5075   case NEON::BI__builtin_neon_vset_lane_i16:
5076   case NEON::BI__builtin_neon_vset_lane_i32:
5077   case NEON::BI__builtin_neon_vset_lane_i64:
5078   case NEON::BI__builtin_neon_vset_lane_f32:
5079   case NEON::BI__builtin_neon_vsetq_lane_i8:
5080   case NEON::BI__builtin_neon_vsetq_lane_i16:
5081   case NEON::BI__builtin_neon_vsetq_lane_i32:
5082   case NEON::BI__builtin_neon_vsetq_lane_i64:
5083   case NEON::BI__builtin_neon_vsetq_lane_f32:
5084     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5085 
5086   case NEON::BI__builtin_neon_vsha1h_u32:
5087     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
5088                         "vsha1h");
5089   case NEON::BI__builtin_neon_vsha1cq_u32:
5090     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
5091                         "vsha1h");
5092   case NEON::BI__builtin_neon_vsha1pq_u32:
5093     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
5094                         "vsha1h");
5095   case NEON::BI__builtin_neon_vsha1mq_u32:
5096     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
5097                         "vsha1h");
5098 
5099   // The ARM _MoveToCoprocessor builtins put the input register value as
5100   // the first argument, but the LLVM intrinsic expects it as the third one.
5101   case ARM::BI_MoveToCoprocessor:
5102   case ARM::BI_MoveToCoprocessor2: {
5103     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
5104                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
5105     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
5106                                   Ops[3], Ops[4], Ops[5]});
5107   }
5108   case ARM::BI_BitScanForward:
5109   case ARM::BI_BitScanForward64:
5110     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
5111   case ARM::BI_BitScanReverse:
5112   case ARM::BI_BitScanReverse64:
5113     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
5114 
5115   case ARM::BI_InterlockedAnd64:
5116     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
5117   case ARM::BI_InterlockedExchange64:
5118     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
5119   case ARM::BI_InterlockedExchangeAdd64:
5120     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
5121   case ARM::BI_InterlockedExchangeSub64:
5122     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
5123   case ARM::BI_InterlockedOr64:
5124     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
5125   case ARM::BI_InterlockedXor64:
5126     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
5127   case ARM::BI_InterlockedDecrement64:
5128     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
5129   case ARM::BI_InterlockedIncrement64:
5130     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
5131   }
5132 
5133   // Get the last argument, which specifies the vector type.
5134   assert(HasExtraArg);
5135   llvm::APSInt Result;
5136   const Expr *Arg = E->getArg(E->getNumArgs()-1);
5137   if (!Arg->isIntegerConstantExpr(Result, getContext()))
5138     return nullptr;
5139 
5140   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
5141       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
5142     // Determine the overloaded type of this builtin.
5143     llvm::Type *Ty;
5144     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
5145       Ty = FloatTy;
5146     else
5147       Ty = DoubleTy;
5148 
5149     // Determine whether this is an unsigned conversion or not.
5150     bool usgn = Result.getZExtValue() == 1;
5151     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
5152 
5153     // Call the appropriate intrinsic.
5154     Function *F = CGM.getIntrinsic(Int, Ty);
5155     return Builder.CreateCall(F, Ops, "vcvtr");
5156   }
5157 
5158   // Determine the type of this overloaded NEON intrinsic.
5159   NeonTypeFlags Type(Result.getZExtValue());
5160   bool usgn = Type.isUnsigned();
5161   bool rightShift = false;
5162 
5163   llvm::VectorType *VTy = GetNeonType(this, Type);
5164   llvm::Type *Ty = VTy;
5165   if (!Ty)
5166     return nullptr;
5167 
5168   // Many NEON builtins have identical semantics and uses in ARM and
5169   // AArch64. Emit these in a single function.
5170   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
5171   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5172       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
5173   if (Builtin)
5174     return EmitCommonNeonBuiltinExpr(
5175         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5176         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
5177 
5178   unsigned Int;
5179   switch (BuiltinID) {
5180   default: return nullptr;
5181   case NEON::BI__builtin_neon_vld1q_lane_v:
5182     // Handle 64-bit integer elements as a special case.  Use shuffles of
5183     // one-element vectors to avoid poor code for i64 in the backend.
5184     if (VTy->getElementType()->isIntegerTy(64)) {
5185       // Extract the other lane.
5186       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5187       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
5188       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
5189       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5190       // Load the value as a one-element vector.
5191       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
5192       llvm::Type *Tys[] = {Ty, Int8PtrTy};
5193       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
5194       Value *Align = getAlignmentValue32(PtrOp0);
5195       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
5196       // Combine them.
5197       uint32_t Indices[] = {1 - Lane, Lane};
5198       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
5199       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
5200     }
5201     // fall through
5202   case NEON::BI__builtin_neon_vld1_lane_v: {
5203     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5204     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
5205     Value *Ld = Builder.CreateLoad(PtrOp0);
5206     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
5207   }
5208   case NEON::BI__builtin_neon_vld2_dup_v:
5209   case NEON::BI__builtin_neon_vld3_dup_v:
5210   case NEON::BI__builtin_neon_vld4_dup_v: {
5211     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
5212     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
5213       switch (BuiltinID) {
5214       case NEON::BI__builtin_neon_vld2_dup_v:
5215         Int = Intrinsic::arm_neon_vld2;
5216         break;
5217       case NEON::BI__builtin_neon_vld3_dup_v:
5218         Int = Intrinsic::arm_neon_vld3;
5219         break;
5220       case NEON::BI__builtin_neon_vld4_dup_v:
5221         Int = Intrinsic::arm_neon_vld4;
5222         break;
5223       default: llvm_unreachable("unknown vld_dup intrinsic?");
5224       }
5225       llvm::Type *Tys[] = {Ty, Int8PtrTy};
5226       Function *F = CGM.getIntrinsic(Int, Tys);
5227       llvm::Value *Align = getAlignmentValue32(PtrOp1);
5228       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
5229       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5230       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5231       return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5232     }
5233     switch (BuiltinID) {
5234     case NEON::BI__builtin_neon_vld2_dup_v:
5235       Int = Intrinsic::arm_neon_vld2lane;
5236       break;
5237     case NEON::BI__builtin_neon_vld3_dup_v:
5238       Int = Intrinsic::arm_neon_vld3lane;
5239       break;
5240     case NEON::BI__builtin_neon_vld4_dup_v:
5241       Int = Intrinsic::arm_neon_vld4lane;
5242       break;
5243     default: llvm_unreachable("unknown vld_dup intrinsic?");
5244     }
5245     llvm::Type *Tys[] = {Ty, Int8PtrTy};
5246     Function *F = CGM.getIntrinsic(Int, Tys);
5247     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
5248 
5249     SmallVector<Value*, 6> Args;
5250     Args.push_back(Ops[1]);
5251     Args.append(STy->getNumElements(), UndefValue::get(Ty));
5252 
5253     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
5254     Args.push_back(CI);
5255     Args.push_back(getAlignmentValue32(PtrOp1));
5256 
5257     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
5258     // splat lane 0 to all elts in each vector of the result.
5259     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5260       Value *Val = Builder.CreateExtractValue(Ops[1], i);
5261       Value *Elt = Builder.CreateBitCast(Val, Ty);
5262       Elt = EmitNeonSplat(Elt, CI);
5263       Elt = Builder.CreateBitCast(Elt, Val->getType());
5264       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
5265     }
5266     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5267     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5268     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5269   }
5270   case NEON::BI__builtin_neon_vqrshrn_n_v:
5271     Int =
5272       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
5273     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
5274                         1, true);
5275   case NEON::BI__builtin_neon_vqrshrun_n_v:
5276     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
5277                         Ops, "vqrshrun_n", 1, true);
5278   case NEON::BI__builtin_neon_vqshrn_n_v:
5279     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
5280     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
5281                         1, true);
5282   case NEON::BI__builtin_neon_vqshrun_n_v:
5283     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
5284                         Ops, "vqshrun_n", 1, true);
5285   case NEON::BI__builtin_neon_vrecpe_v:
5286   case NEON::BI__builtin_neon_vrecpeq_v:
5287     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
5288                         Ops, "vrecpe");
5289   case NEON::BI__builtin_neon_vrshrn_n_v:
5290     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
5291                         Ops, "vrshrn_n", 1, true);
5292   case NEON::BI__builtin_neon_vrsra_n_v:
5293   case NEON::BI__builtin_neon_vrsraq_n_v:
5294     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5295     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5296     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
5297     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
5298     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
5299     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
5300   case NEON::BI__builtin_neon_vsri_n_v:
5301   case NEON::BI__builtin_neon_vsriq_n_v:
5302     rightShift = true;
5303     LLVM_FALLTHROUGH;
5304   case NEON::BI__builtin_neon_vsli_n_v:
5305   case NEON::BI__builtin_neon_vsliq_n_v:
5306     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
5307     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
5308                         Ops, "vsli_n");
5309   case NEON::BI__builtin_neon_vsra_n_v:
5310   case NEON::BI__builtin_neon_vsraq_n_v:
5311     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5312     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
5313     return Builder.CreateAdd(Ops[0], Ops[1]);
5314   case NEON::BI__builtin_neon_vst1q_lane_v:
5315     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
5316     // a one-element vector and avoid poor code for i64 in the backend.
5317     if (VTy->getElementType()->isIntegerTy(64)) {
5318       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5319       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
5320       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5321       Ops[2] = getAlignmentValue32(PtrOp0);
5322       llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
5323       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
5324                                                  Tys), Ops);
5325     }
5326     // fall through
5327   case NEON::BI__builtin_neon_vst1_lane_v: {
5328     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5329     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
5330     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5331     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
5332     return St;
5333   }
5334   case NEON::BI__builtin_neon_vtbl1_v:
5335     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
5336                         Ops, "vtbl1");
5337   case NEON::BI__builtin_neon_vtbl2_v:
5338     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
5339                         Ops, "vtbl2");
5340   case NEON::BI__builtin_neon_vtbl3_v:
5341     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
5342                         Ops, "vtbl3");
5343   case NEON::BI__builtin_neon_vtbl4_v:
5344     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
5345                         Ops, "vtbl4");
5346   case NEON::BI__builtin_neon_vtbx1_v:
5347     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
5348                         Ops, "vtbx1");
5349   case NEON::BI__builtin_neon_vtbx2_v:
5350     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
5351                         Ops, "vtbx2");
5352   case NEON::BI__builtin_neon_vtbx3_v:
5353     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
5354                         Ops, "vtbx3");
5355   case NEON::BI__builtin_neon_vtbx4_v:
5356     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
5357                         Ops, "vtbx4");
5358   }
5359 }
5360 
5361 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
5362                                       const CallExpr *E,
5363                                       SmallVectorImpl<Value *> &Ops) {
5364   unsigned int Int = 0;
5365   const char *s = nullptr;
5366 
5367   switch (BuiltinID) {
5368   default:
5369     return nullptr;
5370   case NEON::BI__builtin_neon_vtbl1_v:
5371   case NEON::BI__builtin_neon_vqtbl1_v:
5372   case NEON::BI__builtin_neon_vqtbl1q_v:
5373   case NEON::BI__builtin_neon_vtbl2_v:
5374   case NEON::BI__builtin_neon_vqtbl2_v:
5375   case NEON::BI__builtin_neon_vqtbl2q_v:
5376   case NEON::BI__builtin_neon_vtbl3_v:
5377   case NEON::BI__builtin_neon_vqtbl3_v:
5378   case NEON::BI__builtin_neon_vqtbl3q_v:
5379   case NEON::BI__builtin_neon_vtbl4_v:
5380   case NEON::BI__builtin_neon_vqtbl4_v:
5381   case NEON::BI__builtin_neon_vqtbl4q_v:
5382     break;
5383   case NEON::BI__builtin_neon_vtbx1_v:
5384   case NEON::BI__builtin_neon_vqtbx1_v:
5385   case NEON::BI__builtin_neon_vqtbx1q_v:
5386   case NEON::BI__builtin_neon_vtbx2_v:
5387   case NEON::BI__builtin_neon_vqtbx2_v:
5388   case NEON::BI__builtin_neon_vqtbx2q_v:
5389   case NEON::BI__builtin_neon_vtbx3_v:
5390   case NEON::BI__builtin_neon_vqtbx3_v:
5391   case NEON::BI__builtin_neon_vqtbx3q_v:
5392   case NEON::BI__builtin_neon_vtbx4_v:
5393   case NEON::BI__builtin_neon_vqtbx4_v:
5394   case NEON::BI__builtin_neon_vqtbx4q_v:
5395     break;
5396   }
5397 
5398   assert(E->getNumArgs() >= 3);
5399 
5400   // Get the last argument, which specifies the vector type.
5401   llvm::APSInt Result;
5402   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
5403   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
5404     return nullptr;
5405 
5406   // Determine the type of this overloaded NEON intrinsic.
5407   NeonTypeFlags Type(Result.getZExtValue());
5408   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
5409   if (!Ty)
5410     return nullptr;
5411 
5412   CodeGen::CGBuilderTy &Builder = CGF.Builder;
5413 
5414   // AArch64 scalar builtins are not overloaded, they do not have an extra
5415   // argument that specifies the vector type, need to handle each case.
5416   switch (BuiltinID) {
5417   case NEON::BI__builtin_neon_vtbl1_v: {
5418     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
5419                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
5420                               "vtbl1");
5421   }
5422   case NEON::BI__builtin_neon_vtbl2_v: {
5423     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
5424                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
5425                               "vtbl1");
5426   }
5427   case NEON::BI__builtin_neon_vtbl3_v: {
5428     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
5429                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
5430                               "vtbl2");
5431   }
5432   case NEON::BI__builtin_neon_vtbl4_v: {
5433     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
5434                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
5435                               "vtbl2");
5436   }
5437   case NEON::BI__builtin_neon_vtbx1_v: {
5438     Value *TblRes =
5439         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
5440                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
5441 
5442     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
5443     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
5444     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5445 
5446     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5447     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5448     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5449   }
5450   case NEON::BI__builtin_neon_vtbx2_v: {
5451     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
5452                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
5453                               "vtbx1");
5454   }
5455   case NEON::BI__builtin_neon_vtbx3_v: {
5456     Value *TblRes =
5457         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
5458                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
5459 
5460     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
5461     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
5462                                            TwentyFourV);
5463     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5464 
5465     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5466     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5467     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5468   }
5469   case NEON::BI__builtin_neon_vtbx4_v: {
5470     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
5471                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
5472                               "vtbx2");
5473   }
5474   case NEON::BI__builtin_neon_vqtbl1_v:
5475   case NEON::BI__builtin_neon_vqtbl1q_v:
5476     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
5477   case NEON::BI__builtin_neon_vqtbl2_v:
5478   case NEON::BI__builtin_neon_vqtbl2q_v: {
5479     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
5480   case NEON::BI__builtin_neon_vqtbl3_v:
5481   case NEON::BI__builtin_neon_vqtbl3q_v:
5482     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
5483   case NEON::BI__builtin_neon_vqtbl4_v:
5484   case NEON::BI__builtin_neon_vqtbl4q_v:
5485     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
5486   case NEON::BI__builtin_neon_vqtbx1_v:
5487   case NEON::BI__builtin_neon_vqtbx1q_v:
5488     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
5489   case NEON::BI__builtin_neon_vqtbx2_v:
5490   case NEON::BI__builtin_neon_vqtbx2q_v:
5491     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
5492   case NEON::BI__builtin_neon_vqtbx3_v:
5493   case NEON::BI__builtin_neon_vqtbx3q_v:
5494     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
5495   case NEON::BI__builtin_neon_vqtbx4_v:
5496   case NEON::BI__builtin_neon_vqtbx4q_v:
5497     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
5498   }
5499   }
5500 
5501   if (!Int)
5502     return nullptr;
5503 
5504   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
5505   return CGF.EmitNeonCall(F, Ops, s);
5506 }
5507 
5508 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
5509   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
5510   Op = Builder.CreateBitCast(Op, Int16Ty);
5511   Value *V = UndefValue::get(VTy);
5512   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
5513   Op = Builder.CreateInsertElement(V, Op, CI);
5514   return Op;
5515 }
5516 
5517 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
5518                                                const CallExpr *E) {
5519   unsigned HintID = static_cast<unsigned>(-1);
5520   switch (BuiltinID) {
5521   default: break;
5522   case AArch64::BI__builtin_arm_nop:
5523     HintID = 0;
5524     break;
5525   case AArch64::BI__builtin_arm_yield:
5526     HintID = 1;
5527     break;
5528   case AArch64::BI__builtin_arm_wfe:
5529     HintID = 2;
5530     break;
5531   case AArch64::BI__builtin_arm_wfi:
5532     HintID = 3;
5533     break;
5534   case AArch64::BI__builtin_arm_sev:
5535     HintID = 4;
5536     break;
5537   case AArch64::BI__builtin_arm_sevl:
5538     HintID = 5;
5539     break;
5540   }
5541 
5542   if (HintID != static_cast<unsigned>(-1)) {
5543     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5544     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5545   }
5546 
5547   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
5548     Value *Address         = EmitScalarExpr(E->getArg(0));
5549     Value *RW              = EmitScalarExpr(E->getArg(1));
5550     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
5551     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
5552     Value *IsData          = EmitScalarExpr(E->getArg(4));
5553 
5554     Value *Locality = nullptr;
5555     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
5556       // Temporal fetch, needs to convert cache level to locality.
5557       Locality = llvm::ConstantInt::get(Int32Ty,
5558         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
5559     } else {
5560       // Streaming fetch.
5561       Locality = llvm::ConstantInt::get(Int32Ty, 0);
5562     }
5563 
5564     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
5565     // PLDL3STRM or PLDL2STRM.
5566     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
5567     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5568   }
5569 
5570   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
5571     assert((getContext().getTypeSize(E->getType()) == 32) &&
5572            "rbit of unusual size!");
5573     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5574     return Builder.CreateCall(
5575         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5576   }
5577   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
5578     assert((getContext().getTypeSize(E->getType()) == 64) &&
5579            "rbit of unusual size!");
5580     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5581     return Builder.CreateCall(
5582         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5583   }
5584 
5585   if (BuiltinID == AArch64::BI__clear_cache) {
5586     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5587     const FunctionDecl *FD = E->getDirectCallee();
5588     Value *Ops[2];
5589     for (unsigned i = 0; i < 2; i++)
5590       Ops[i] = EmitScalarExpr(E->getArg(i));
5591     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5592     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5593     StringRef Name = FD->getName();
5594     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5595   }
5596 
5597   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5598       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
5599       getContext().getTypeSize(E->getType()) == 128) {
5600     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5601                                        ? Intrinsic::aarch64_ldaxp
5602                                        : Intrinsic::aarch64_ldxp);
5603 
5604     Value *LdPtr = EmitScalarExpr(E->getArg(0));
5605     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
5606                                     "ldxp");
5607 
5608     Value *Val0 = Builder.CreateExtractValue(Val, 1);
5609     Value *Val1 = Builder.CreateExtractValue(Val, 0);
5610     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5611     Val0 = Builder.CreateZExt(Val0, Int128Ty);
5612     Val1 = Builder.CreateZExt(Val1, Int128Ty);
5613 
5614     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5615     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5616     Val = Builder.CreateOr(Val, Val1);
5617     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5618   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5619              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
5620     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5621 
5622     QualType Ty = E->getType();
5623     llvm::Type *RealResTy = ConvertType(Ty);
5624     llvm::Type *PtrTy = llvm::IntegerType::get(
5625         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
5626     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
5627 
5628     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5629                                        ? Intrinsic::aarch64_ldaxr
5630                                        : Intrinsic::aarch64_ldxr,
5631                                    PtrTy);
5632     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5633 
5634     if (RealResTy->isPointerTy())
5635       return Builder.CreateIntToPtr(Val, RealResTy);
5636 
5637     llvm::Type *IntResTy = llvm::IntegerType::get(
5638         getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5639     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
5640     return Builder.CreateBitCast(Val, RealResTy);
5641   }
5642 
5643   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
5644        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
5645       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5646     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5647                                        ? Intrinsic::aarch64_stlxp
5648                                        : Intrinsic::aarch64_stxp);
5649     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
5650 
5651     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5652     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5653 
5654     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
5655     llvm::Value *Val = Builder.CreateLoad(Tmp);
5656 
5657     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5658     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5659     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
5660                                          Int8PtrTy);
5661     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5662   }
5663 
5664   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
5665       BuiltinID == AArch64::BI__builtin_arm_stlex) {
5666     Value *StoreVal = EmitScalarExpr(E->getArg(0));
5667     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5668 
5669     QualType Ty = E->getArg(0)->getType();
5670     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
5671                                                  getContext().getTypeSize(Ty));
5672     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
5673 
5674     if (StoreVal->getType()->isPointerTy())
5675       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5676     else {
5677       llvm::Type *IntTy = llvm::IntegerType::get(
5678           getLLVMContext(),
5679           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5680       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5681       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5682     }
5683 
5684     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5685                                        ? Intrinsic::aarch64_stlxr
5686                                        : Intrinsic::aarch64_stxr,
5687                                    StoreAddr->getType());
5688     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5689   }
5690 
5691   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
5692     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5693     return Builder.CreateCall(F);
5694   }
5695 
5696   // CRC32
5697   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5698   switch (BuiltinID) {
5699   case AArch64::BI__builtin_arm_crc32b:
5700     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5701   case AArch64::BI__builtin_arm_crc32cb:
5702     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5703   case AArch64::BI__builtin_arm_crc32h:
5704     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5705   case AArch64::BI__builtin_arm_crc32ch:
5706     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5707   case AArch64::BI__builtin_arm_crc32w:
5708     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5709   case AArch64::BI__builtin_arm_crc32cw:
5710     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5711   case AArch64::BI__builtin_arm_crc32d:
5712     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5713   case AArch64::BI__builtin_arm_crc32cd:
5714     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5715   }
5716 
5717   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5718     Value *Arg0 = EmitScalarExpr(E->getArg(0));
5719     Value *Arg1 = EmitScalarExpr(E->getArg(1));
5720     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5721 
5722     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5723     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5724 
5725     return Builder.CreateCall(F, {Arg0, Arg1});
5726   }
5727 
5728   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
5729       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5730       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5731       BuiltinID == AArch64::BI__builtin_arm_wsr ||
5732       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
5733       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
5734 
5735     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
5736                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5737                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
5738 
5739     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5740                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
5741 
5742     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
5743                    BuiltinID != AArch64::BI__builtin_arm_wsr;
5744 
5745     llvm::Type *ValueType;
5746     llvm::Type *RegisterType = Int64Ty;
5747     if (IsPointerBuiltin) {
5748       ValueType = VoidPtrTy;
5749     } else if (Is64Bit) {
5750       ValueType = Int64Ty;
5751     } else {
5752       ValueType = Int32Ty;
5753     }
5754 
5755     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5756   }
5757 
5758   // Find out if any arguments are required to be integer constant
5759   // expressions.
5760   unsigned ICEArguments = 0;
5761   ASTContext::GetBuiltinTypeError Error;
5762   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5763   assert(Error == ASTContext::GE_None && "Should not codegen an error");
5764 
5765   llvm::SmallVector<Value*, 4> Ops;
5766   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5767     if ((ICEArguments & (1 << i)) == 0) {
5768       Ops.push_back(EmitScalarExpr(E->getArg(i)));
5769     } else {
5770       // If this is required to be a constant, constant fold it so that we know
5771       // that the generated intrinsic gets a ConstantInt.
5772       llvm::APSInt Result;
5773       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5774       assert(IsConst && "Constant arg isn't actually constant?");
5775       (void)IsConst;
5776       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5777     }
5778   }
5779 
5780   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
5781   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5782       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5783 
5784   if (Builtin) {
5785     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5786     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
5787     assert(Result && "SISD intrinsic should have been handled");
5788     return Result;
5789   }
5790 
5791   llvm::APSInt Result;
5792   const Expr *Arg = E->getArg(E->getNumArgs()-1);
5793   NeonTypeFlags Type(0);
5794   if (Arg->isIntegerConstantExpr(Result, getContext()))
5795     // Determine the type of this overloaded NEON intrinsic.
5796     Type = NeonTypeFlags(Result.getZExtValue());
5797 
5798   bool usgn = Type.isUnsigned();
5799   bool quad = Type.isQuad();
5800 
5801   // Handle non-overloaded intrinsics first.
5802   switch (BuiltinID) {
5803   default: break;
5804   case NEON::BI__builtin_neon_vldrq_p128: {
5805     llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5806     llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
5807     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
5808     return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5809                                      CharUnits::fromQuantity(16));
5810   }
5811   case NEON::BI__builtin_neon_vstrq_p128: {
5812     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5813     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
5814     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5815   }
5816   case NEON::BI__builtin_neon_vcvts_u32_f32:
5817   case NEON::BI__builtin_neon_vcvtd_u64_f64:
5818     usgn = true;
5819     // FALL THROUGH
5820   case NEON::BI__builtin_neon_vcvts_s32_f32:
5821   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
5822     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5823     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5824     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5825     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5826     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
5827     if (usgn)
5828       return Builder.CreateFPToUI(Ops[0], InTy);
5829     return Builder.CreateFPToSI(Ops[0], InTy);
5830   }
5831   case NEON::BI__builtin_neon_vcvts_f32_u32:
5832   case NEON::BI__builtin_neon_vcvtd_f64_u64:
5833     usgn = true;
5834     // FALL THROUGH
5835   case NEON::BI__builtin_neon_vcvts_f32_s32:
5836   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5837     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5838     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5839     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5840     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5841     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5842     if (usgn)
5843       return Builder.CreateUIToFP(Ops[0], FTy);
5844     return Builder.CreateSIToFP(Ops[0], FTy);
5845   }
5846   case NEON::BI__builtin_neon_vpaddd_s64: {
5847     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
5848     Value *Vec = EmitScalarExpr(E->getArg(0));
5849     // The vector is v2f64, so make sure it's bitcast to that.
5850     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5851     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5852     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5853     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5854     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5855     // Pairwise addition of a v2f64 into a scalar f64.
5856     return Builder.CreateAdd(Op0, Op1, "vpaddd");
5857   }
5858   case NEON::BI__builtin_neon_vpaddd_f64: {
5859     llvm::Type *Ty =
5860       llvm::VectorType::get(DoubleTy, 2);
5861     Value *Vec = EmitScalarExpr(E->getArg(0));
5862     // The vector is v2f64, so make sure it's bitcast to that.
5863     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5864     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5865     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5866     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5867     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5868     // Pairwise addition of a v2f64 into a scalar f64.
5869     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5870   }
5871   case NEON::BI__builtin_neon_vpadds_f32: {
5872     llvm::Type *Ty =
5873       llvm::VectorType::get(FloatTy, 2);
5874     Value *Vec = EmitScalarExpr(E->getArg(0));
5875     // The vector is v2f32, so make sure it's bitcast to that.
5876     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5877     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5878     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5879     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5880     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5881     // Pairwise addition of a v2f32 into a scalar f32.
5882     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5883   }
5884   case NEON::BI__builtin_neon_vceqzd_s64:
5885   case NEON::BI__builtin_neon_vceqzd_f64:
5886   case NEON::BI__builtin_neon_vceqzs_f32:
5887     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5888     return EmitAArch64CompareBuiltinExpr(
5889         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5890         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5891   case NEON::BI__builtin_neon_vcgezd_s64:
5892   case NEON::BI__builtin_neon_vcgezd_f64:
5893   case NEON::BI__builtin_neon_vcgezs_f32:
5894     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5895     return EmitAArch64CompareBuiltinExpr(
5896         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5897         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5898   case NEON::BI__builtin_neon_vclezd_s64:
5899   case NEON::BI__builtin_neon_vclezd_f64:
5900   case NEON::BI__builtin_neon_vclezs_f32:
5901     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5902     return EmitAArch64CompareBuiltinExpr(
5903         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5904         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
5905   case NEON::BI__builtin_neon_vcgtzd_s64:
5906   case NEON::BI__builtin_neon_vcgtzd_f64:
5907   case NEON::BI__builtin_neon_vcgtzs_f32:
5908     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5909     return EmitAArch64CompareBuiltinExpr(
5910         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5911         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
5912   case NEON::BI__builtin_neon_vcltzd_s64:
5913   case NEON::BI__builtin_neon_vcltzd_f64:
5914   case NEON::BI__builtin_neon_vcltzs_f32:
5915     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5916     return EmitAArch64CompareBuiltinExpr(
5917         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5918         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
5919 
5920   case NEON::BI__builtin_neon_vceqzd_u64: {
5921     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5922     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5923     Ops[0] =
5924         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
5925     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
5926   }
5927   case NEON::BI__builtin_neon_vceqd_f64:
5928   case NEON::BI__builtin_neon_vcled_f64:
5929   case NEON::BI__builtin_neon_vcltd_f64:
5930   case NEON::BI__builtin_neon_vcged_f64:
5931   case NEON::BI__builtin_neon_vcgtd_f64: {
5932     llvm::CmpInst::Predicate P;
5933     switch (BuiltinID) {
5934     default: llvm_unreachable("missing builtin ID in switch!");
5935     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5936     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5937     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5938     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5939     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5940     }
5941     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5942     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5943     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5944     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5945     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5946   }
5947   case NEON::BI__builtin_neon_vceqs_f32:
5948   case NEON::BI__builtin_neon_vcles_f32:
5949   case NEON::BI__builtin_neon_vclts_f32:
5950   case NEON::BI__builtin_neon_vcges_f32:
5951   case NEON::BI__builtin_neon_vcgts_f32: {
5952     llvm::CmpInst::Predicate P;
5953     switch (BuiltinID) {
5954     default: llvm_unreachable("missing builtin ID in switch!");
5955     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5956     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5957     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5958     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5959     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5960     }
5961     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5962     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5963     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5964     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5965     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5966   }
5967   case NEON::BI__builtin_neon_vceqd_s64:
5968   case NEON::BI__builtin_neon_vceqd_u64:
5969   case NEON::BI__builtin_neon_vcgtd_s64:
5970   case NEON::BI__builtin_neon_vcgtd_u64:
5971   case NEON::BI__builtin_neon_vcltd_s64:
5972   case NEON::BI__builtin_neon_vcltd_u64:
5973   case NEON::BI__builtin_neon_vcged_u64:
5974   case NEON::BI__builtin_neon_vcged_s64:
5975   case NEON::BI__builtin_neon_vcled_u64:
5976   case NEON::BI__builtin_neon_vcled_s64: {
5977     llvm::CmpInst::Predicate P;
5978     switch (BuiltinID) {
5979     default: llvm_unreachable("missing builtin ID in switch!");
5980     case NEON::BI__builtin_neon_vceqd_s64:
5981     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5982     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5983     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5984     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5985     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5986     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5987     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5988     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5989     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5990     }
5991     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5992     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5993     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5994     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5995     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5996   }
5997   case NEON::BI__builtin_neon_vtstd_s64:
5998   case NEON::BI__builtin_neon_vtstd_u64: {
5999     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6000     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6001     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6002     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
6003     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
6004                                 llvm::Constant::getNullValue(Int64Ty));
6005     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
6006   }
6007   case NEON::BI__builtin_neon_vset_lane_i8:
6008   case NEON::BI__builtin_neon_vset_lane_i16:
6009   case NEON::BI__builtin_neon_vset_lane_i32:
6010   case NEON::BI__builtin_neon_vset_lane_i64:
6011   case NEON::BI__builtin_neon_vset_lane_f32:
6012   case NEON::BI__builtin_neon_vsetq_lane_i8:
6013   case NEON::BI__builtin_neon_vsetq_lane_i16:
6014   case NEON::BI__builtin_neon_vsetq_lane_i32:
6015   case NEON::BI__builtin_neon_vsetq_lane_i64:
6016   case NEON::BI__builtin_neon_vsetq_lane_f32:
6017     Ops.push_back(EmitScalarExpr(E->getArg(2)));
6018     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6019   case NEON::BI__builtin_neon_vset_lane_f64:
6020     // The vector type needs a cast for the v1f64 variant.
6021     Ops[1] = Builder.CreateBitCast(Ops[1],
6022                                    llvm::VectorType::get(DoubleTy, 1));
6023     Ops.push_back(EmitScalarExpr(E->getArg(2)));
6024     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6025   case NEON::BI__builtin_neon_vsetq_lane_f64:
6026     // The vector type needs a cast for the v2f64 variant.
6027     Ops[1] = Builder.CreateBitCast(Ops[1],
6028         llvm::VectorType::get(DoubleTy, 2));
6029     Ops.push_back(EmitScalarExpr(E->getArg(2)));
6030     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6031 
6032   case NEON::BI__builtin_neon_vget_lane_i8:
6033   case NEON::BI__builtin_neon_vdupb_lane_i8:
6034     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
6035     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6036                                         "vget_lane");
6037   case NEON::BI__builtin_neon_vgetq_lane_i8:
6038   case NEON::BI__builtin_neon_vdupb_laneq_i8:
6039     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
6040     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6041                                         "vgetq_lane");
6042   case NEON::BI__builtin_neon_vget_lane_i16:
6043   case NEON::BI__builtin_neon_vduph_lane_i16:
6044     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
6045     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6046                                         "vget_lane");
6047   case NEON::BI__builtin_neon_vgetq_lane_i16:
6048   case NEON::BI__builtin_neon_vduph_laneq_i16:
6049     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
6050     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6051                                         "vgetq_lane");
6052   case NEON::BI__builtin_neon_vget_lane_i32:
6053   case NEON::BI__builtin_neon_vdups_lane_i32:
6054     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
6055     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6056                                         "vget_lane");
6057   case NEON::BI__builtin_neon_vdups_lane_f32:
6058     Ops[0] = Builder.CreateBitCast(Ops[0],
6059         llvm::VectorType::get(FloatTy, 2));
6060     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6061                                         "vdups_lane");
6062   case NEON::BI__builtin_neon_vgetq_lane_i32:
6063   case NEON::BI__builtin_neon_vdups_laneq_i32:
6064     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
6065     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6066                                         "vgetq_lane");
6067   case NEON::BI__builtin_neon_vget_lane_i64:
6068   case NEON::BI__builtin_neon_vdupd_lane_i64:
6069     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
6070     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6071                                         "vget_lane");
6072   case NEON::BI__builtin_neon_vdupd_lane_f64:
6073     Ops[0] = Builder.CreateBitCast(Ops[0],
6074         llvm::VectorType::get(DoubleTy, 1));
6075     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6076                                         "vdupd_lane");
6077   case NEON::BI__builtin_neon_vgetq_lane_i64:
6078   case NEON::BI__builtin_neon_vdupd_laneq_i64:
6079     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
6080     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6081                                         "vgetq_lane");
6082   case NEON::BI__builtin_neon_vget_lane_f32:
6083     Ops[0] = Builder.CreateBitCast(Ops[0],
6084         llvm::VectorType::get(FloatTy, 2));
6085     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6086                                         "vget_lane");
6087   case NEON::BI__builtin_neon_vget_lane_f64:
6088     Ops[0] = Builder.CreateBitCast(Ops[0],
6089         llvm::VectorType::get(DoubleTy, 1));
6090     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6091                                         "vget_lane");
6092   case NEON::BI__builtin_neon_vgetq_lane_f32:
6093   case NEON::BI__builtin_neon_vdups_laneq_f32:
6094     Ops[0] = Builder.CreateBitCast(Ops[0],
6095         llvm::VectorType::get(FloatTy, 4));
6096     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6097                                         "vgetq_lane");
6098   case NEON::BI__builtin_neon_vgetq_lane_f64:
6099   case NEON::BI__builtin_neon_vdupd_laneq_f64:
6100     Ops[0] = Builder.CreateBitCast(Ops[0],
6101         llvm::VectorType::get(DoubleTy, 2));
6102     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6103                                         "vgetq_lane");
6104   case NEON::BI__builtin_neon_vaddd_s64:
6105   case NEON::BI__builtin_neon_vaddd_u64:
6106     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
6107   case NEON::BI__builtin_neon_vsubd_s64:
6108   case NEON::BI__builtin_neon_vsubd_u64:
6109     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
6110   case NEON::BI__builtin_neon_vqdmlalh_s16:
6111   case NEON::BI__builtin_neon_vqdmlslh_s16: {
6112     SmallVector<Value *, 2> ProductOps;
6113     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6114     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
6115     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
6116     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6117                           ProductOps, "vqdmlXl");
6118     Constant *CI = ConstantInt::get(SizeTy, 0);
6119     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6120 
6121     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6122                                         ? Intrinsic::aarch64_neon_sqadd
6123                                         : Intrinsic::aarch64_neon_sqsub;
6124     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
6125   }
6126   case NEON::BI__builtin_neon_vqshlud_n_s64: {
6127     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6128     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6129     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
6130                         Ops, "vqshlu_n");
6131   }
6132   case NEON::BI__builtin_neon_vqshld_n_u64:
6133   case NEON::BI__builtin_neon_vqshld_n_s64: {
6134     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6135                                    ? Intrinsic::aarch64_neon_uqshl
6136                                    : Intrinsic::aarch64_neon_sqshl;
6137     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6138     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6139     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
6140   }
6141   case NEON::BI__builtin_neon_vrshrd_n_u64:
6142   case NEON::BI__builtin_neon_vrshrd_n_s64: {
6143     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6144                                    ? Intrinsic::aarch64_neon_urshl
6145                                    : Intrinsic::aarch64_neon_srshl;
6146     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6147     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
6148     Ops[1] = ConstantInt::get(Int64Ty, -SV);
6149     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
6150   }
6151   case NEON::BI__builtin_neon_vrsrad_n_u64:
6152   case NEON::BI__builtin_neon_vrsrad_n_s64: {
6153     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6154                                    ? Intrinsic::aarch64_neon_urshl
6155                                    : Intrinsic::aarch64_neon_srshl;
6156     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6157     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
6158     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
6159                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
6160     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
6161   }
6162   case NEON::BI__builtin_neon_vshld_n_s64:
6163   case NEON::BI__builtin_neon_vshld_n_u64: {
6164     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6165     return Builder.CreateShl(
6166         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
6167   }
6168   case NEON::BI__builtin_neon_vshrd_n_s64: {
6169     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6170     return Builder.CreateAShr(
6171         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6172                                                    Amt->getZExtValue())),
6173         "shrd_n");
6174   }
6175   case NEON::BI__builtin_neon_vshrd_n_u64: {
6176     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6177     uint64_t ShiftAmt = Amt->getZExtValue();
6178     // Right-shifting an unsigned value by its size yields 0.
6179     if (ShiftAmt == 64)
6180       return ConstantInt::get(Int64Ty, 0);
6181     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
6182                               "shrd_n");
6183   }
6184   case NEON::BI__builtin_neon_vsrad_n_s64: {
6185     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6186     Ops[1] = Builder.CreateAShr(
6187         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6188                                                    Amt->getZExtValue())),
6189         "shrd_n");
6190     return Builder.CreateAdd(Ops[0], Ops[1]);
6191   }
6192   case NEON::BI__builtin_neon_vsrad_n_u64: {
6193     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6194     uint64_t ShiftAmt = Amt->getZExtValue();
6195     // Right-shifting an unsigned value by its size yields 0.
6196     // As Op + 0 = Op, return Ops[0] directly.
6197     if (ShiftAmt == 64)
6198       return Ops[0];
6199     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
6200                                 "shrd_n");
6201     return Builder.CreateAdd(Ops[0], Ops[1]);
6202   }
6203   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6204   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6205   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6206   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6207     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6208                                           "lane");
6209     SmallVector<Value *, 2> ProductOps;
6210     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6211     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
6212     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
6213     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6214                           ProductOps, "vqdmlXl");
6215     Constant *CI = ConstantInt::get(SizeTy, 0);
6216     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6217     Ops.pop_back();
6218 
6219     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6220                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6221                           ? Intrinsic::aarch64_neon_sqadd
6222                           : Intrinsic::aarch64_neon_sqsub;
6223     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
6224   }
6225   case NEON::BI__builtin_neon_vqdmlals_s32:
6226   case NEON::BI__builtin_neon_vqdmlsls_s32: {
6227     SmallVector<Value *, 2> ProductOps;
6228     ProductOps.push_back(Ops[1]);
6229     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
6230     Ops[1] =
6231         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6232                      ProductOps, "vqdmlXl");
6233 
6234     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6235                                         ? Intrinsic::aarch64_neon_sqadd
6236                                         : Intrinsic::aarch64_neon_sqsub;
6237     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
6238   }
6239   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6240   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6241   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6242   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6243     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6244                                           "lane");
6245     SmallVector<Value *, 2> ProductOps;
6246     ProductOps.push_back(Ops[1]);
6247     ProductOps.push_back(Ops[2]);
6248     Ops[1] =
6249         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6250                      ProductOps, "vqdmlXl");
6251     Ops.pop_back();
6252 
6253     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6254                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6255                           ? Intrinsic::aarch64_neon_sqadd
6256                           : Intrinsic::aarch64_neon_sqsub;
6257     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6258   }
6259   }
6260 
6261   llvm::VectorType *VTy = GetNeonType(this, Type);
6262   llvm::Type *Ty = VTy;
6263   if (!Ty)
6264     return nullptr;
6265 
6266   // Not all intrinsics handled by the common case work for AArch64 yet, so only
6267   // defer to common code if it's been added to our special map.
6268   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
6269                                    AArch64SIMDIntrinsicsProvenSorted);
6270 
6271   if (Builtin)
6272     return EmitCommonNeonBuiltinExpr(
6273         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6274         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
6275         /*never use addresses*/ Address::invalid(), Address::invalid());
6276 
6277   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
6278     return V;
6279 
6280   unsigned Int;
6281   switch (BuiltinID) {
6282   default: return nullptr;
6283   case NEON::BI__builtin_neon_vbsl_v:
6284   case NEON::BI__builtin_neon_vbslq_v: {
6285     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6286     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6287     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6288     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6289 
6290     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6291     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6292     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6293     return Builder.CreateBitCast(Ops[0], Ty);
6294   }
6295   case NEON::BI__builtin_neon_vfma_lane_v:
6296   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6297     // The ARM builtins (and instructions) have the addend as the first
6298     // operand, but the 'fma' intrinsics have it last. Swap it around here.
6299     Value *Addend = Ops[0];
6300     Value *Multiplicand = Ops[1];
6301     Value *LaneSource = Ops[2];
6302     Ops[0] = Multiplicand;
6303     Ops[1] = LaneSource;
6304     Ops[2] = Addend;
6305 
6306     // Now adjust things to handle the lane access.
6307     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
6308       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
6309       VTy;
6310     llvm::Constant *cst = cast<Constant>(Ops[3]);
6311     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
6312     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6313     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6314 
6315     Ops.pop_back();
6316     Int = Intrinsic::fma;
6317     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6318   }
6319   case NEON::BI__builtin_neon_vfma_laneq_v: {
6320     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6321     // v1f64 fma should be mapped to Neon scalar f64 fma
6322     if (VTy && VTy->getElementType() == DoubleTy) {
6323       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6324       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6325       llvm::Type *VTy = GetNeonType(this,
6326         NeonTypeFlags(NeonTypeFlags::Float64, false, true));
6327       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6328       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6329       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
6330       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6331       return Builder.CreateBitCast(Result, Ty);
6332     }
6333     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6334     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6335     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6336 
6337     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
6338                                             VTy->getNumElements() * 2);
6339     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6340     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
6341                                                cast<ConstantInt>(Ops[3]));
6342     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6343 
6344     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6345   }
6346   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6347     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6348     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6349     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6350 
6351     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6352     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6353     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6354   }
6355   case NEON::BI__builtin_neon_vfmas_lane_f32:
6356   case NEON::BI__builtin_neon_vfmas_laneq_f32:
6357   case NEON::BI__builtin_neon_vfmad_lane_f64:
6358   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6359     Ops.push_back(EmitScalarExpr(E->getArg(3)));
6360     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6361     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6362     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6363     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6364   }
6365   case NEON::BI__builtin_neon_vmull_v:
6366     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6367     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6368     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6369     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6370   case NEON::BI__builtin_neon_vmax_v:
6371   case NEON::BI__builtin_neon_vmaxq_v:
6372     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6373     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6374     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6375     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6376   case NEON::BI__builtin_neon_vmin_v:
6377   case NEON::BI__builtin_neon_vminq_v:
6378     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6379     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6380     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6381     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6382   case NEON::BI__builtin_neon_vabd_v:
6383   case NEON::BI__builtin_neon_vabdq_v:
6384     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6385     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6386     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6387     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6388   case NEON::BI__builtin_neon_vpadal_v:
6389   case NEON::BI__builtin_neon_vpadalq_v: {
6390     unsigned ArgElts = VTy->getNumElements();
6391     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6392     unsigned BitWidth = EltTy->getBitWidth();
6393     llvm::Type *ArgTy = llvm::VectorType::get(
6394         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
6395     llvm::Type* Tys[2] = { VTy, ArgTy };
6396     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6397     SmallVector<llvm::Value*, 1> TmpOps;
6398     TmpOps.push_back(Ops[1]);
6399     Function *F = CGM.getIntrinsic(Int, Tys);
6400     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6401     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6402     return Builder.CreateAdd(tmp, addend);
6403   }
6404   case NEON::BI__builtin_neon_vpmin_v:
6405   case NEON::BI__builtin_neon_vpminq_v:
6406     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6407     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6408     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6409     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6410   case NEON::BI__builtin_neon_vpmax_v:
6411   case NEON::BI__builtin_neon_vpmaxq_v:
6412     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6413     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6414     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6415     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6416   case NEON::BI__builtin_neon_vminnm_v:
6417   case NEON::BI__builtin_neon_vminnmq_v:
6418     Int = Intrinsic::aarch64_neon_fminnm;
6419     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6420   case NEON::BI__builtin_neon_vmaxnm_v:
6421   case NEON::BI__builtin_neon_vmaxnmq_v:
6422     Int = Intrinsic::aarch64_neon_fmaxnm;
6423     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6424   case NEON::BI__builtin_neon_vrecpss_f32: {
6425     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6426     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6427                         Ops, "vrecps");
6428   }
6429   case NEON::BI__builtin_neon_vrecpsd_f64: {
6430     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6431     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6432                         Ops, "vrecps");
6433   }
6434   case NEON::BI__builtin_neon_vqshrun_n_v:
6435     Int = Intrinsic::aarch64_neon_sqshrun;
6436     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6437   case NEON::BI__builtin_neon_vqrshrun_n_v:
6438     Int = Intrinsic::aarch64_neon_sqrshrun;
6439     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6440   case NEON::BI__builtin_neon_vqshrn_n_v:
6441     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6442     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6443   case NEON::BI__builtin_neon_vrshrn_n_v:
6444     Int = Intrinsic::aarch64_neon_rshrn;
6445     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6446   case NEON::BI__builtin_neon_vqrshrn_n_v:
6447     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6448     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6449   case NEON::BI__builtin_neon_vrnda_v:
6450   case NEON::BI__builtin_neon_vrndaq_v: {
6451     Int = Intrinsic::round;
6452     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6453   }
6454   case NEON::BI__builtin_neon_vrndi_v:
6455   case NEON::BI__builtin_neon_vrndiq_v: {
6456     Int = Intrinsic::nearbyint;
6457     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
6458   }
6459   case NEON::BI__builtin_neon_vrndm_v:
6460   case NEON::BI__builtin_neon_vrndmq_v: {
6461     Int = Intrinsic::floor;
6462     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6463   }
6464   case NEON::BI__builtin_neon_vrndn_v:
6465   case NEON::BI__builtin_neon_vrndnq_v: {
6466     Int = Intrinsic::aarch64_neon_frintn;
6467     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6468   }
6469   case NEON::BI__builtin_neon_vrndp_v:
6470   case NEON::BI__builtin_neon_vrndpq_v: {
6471     Int = Intrinsic::ceil;
6472     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6473   }
6474   case NEON::BI__builtin_neon_vrndx_v:
6475   case NEON::BI__builtin_neon_vrndxq_v: {
6476     Int = Intrinsic::rint;
6477     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6478   }
6479   case NEON::BI__builtin_neon_vrnd_v:
6480   case NEON::BI__builtin_neon_vrndq_v: {
6481     Int = Intrinsic::trunc;
6482     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6483   }
6484   case NEON::BI__builtin_neon_vceqz_v:
6485   case NEON::BI__builtin_neon_vceqzq_v:
6486     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
6487                                          ICmpInst::ICMP_EQ, "vceqz");
6488   case NEON::BI__builtin_neon_vcgez_v:
6489   case NEON::BI__builtin_neon_vcgezq_v:
6490     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
6491                                          ICmpInst::ICMP_SGE, "vcgez");
6492   case NEON::BI__builtin_neon_vclez_v:
6493   case NEON::BI__builtin_neon_vclezq_v:
6494     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
6495                                          ICmpInst::ICMP_SLE, "vclez");
6496   case NEON::BI__builtin_neon_vcgtz_v:
6497   case NEON::BI__builtin_neon_vcgtzq_v:
6498     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
6499                                          ICmpInst::ICMP_SGT, "vcgtz");
6500   case NEON::BI__builtin_neon_vcltz_v:
6501   case NEON::BI__builtin_neon_vcltzq_v:
6502     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
6503                                          ICmpInst::ICMP_SLT, "vcltz");
6504   case NEON::BI__builtin_neon_vcvt_f64_v:
6505   case NEON::BI__builtin_neon_vcvtq_f64_v:
6506     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6507     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6508     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6509                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6510   case NEON::BI__builtin_neon_vcvt_f64_f32: {
6511     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6512            "unexpected vcvt_f64_f32 builtin");
6513     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6514     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6515 
6516     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6517   }
6518   case NEON::BI__builtin_neon_vcvt_f32_f64: {
6519     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6520            "unexpected vcvt_f32_f64 builtin");
6521     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6522     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6523 
6524     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6525   }
6526   case NEON::BI__builtin_neon_vcvt_s32_v:
6527   case NEON::BI__builtin_neon_vcvt_u32_v:
6528   case NEON::BI__builtin_neon_vcvt_s64_v:
6529   case NEON::BI__builtin_neon_vcvt_u64_v:
6530   case NEON::BI__builtin_neon_vcvtq_s32_v:
6531   case NEON::BI__builtin_neon_vcvtq_u32_v:
6532   case NEON::BI__builtin_neon_vcvtq_s64_v:
6533   case NEON::BI__builtin_neon_vcvtq_u64_v: {
6534     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
6535     if (usgn)
6536       return Builder.CreateFPToUI(Ops[0], Ty);
6537     return Builder.CreateFPToSI(Ops[0], Ty);
6538   }
6539   case NEON::BI__builtin_neon_vcvta_s32_v:
6540   case NEON::BI__builtin_neon_vcvtaq_s32_v:
6541   case NEON::BI__builtin_neon_vcvta_u32_v:
6542   case NEON::BI__builtin_neon_vcvtaq_u32_v:
6543   case NEON::BI__builtin_neon_vcvta_s64_v:
6544   case NEON::BI__builtin_neon_vcvtaq_s64_v:
6545   case NEON::BI__builtin_neon_vcvta_u64_v:
6546   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6547     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6548     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6549     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6550   }
6551   case NEON::BI__builtin_neon_vcvtm_s32_v:
6552   case NEON::BI__builtin_neon_vcvtmq_s32_v:
6553   case NEON::BI__builtin_neon_vcvtm_u32_v:
6554   case NEON::BI__builtin_neon_vcvtmq_u32_v:
6555   case NEON::BI__builtin_neon_vcvtm_s64_v:
6556   case NEON::BI__builtin_neon_vcvtmq_s64_v:
6557   case NEON::BI__builtin_neon_vcvtm_u64_v:
6558   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6559     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6560     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6561     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6562   }
6563   case NEON::BI__builtin_neon_vcvtn_s32_v:
6564   case NEON::BI__builtin_neon_vcvtnq_s32_v:
6565   case NEON::BI__builtin_neon_vcvtn_u32_v:
6566   case NEON::BI__builtin_neon_vcvtnq_u32_v:
6567   case NEON::BI__builtin_neon_vcvtn_s64_v:
6568   case NEON::BI__builtin_neon_vcvtnq_s64_v:
6569   case NEON::BI__builtin_neon_vcvtn_u64_v:
6570   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6571     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6572     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6573     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6574   }
6575   case NEON::BI__builtin_neon_vcvtp_s32_v:
6576   case NEON::BI__builtin_neon_vcvtpq_s32_v:
6577   case NEON::BI__builtin_neon_vcvtp_u32_v:
6578   case NEON::BI__builtin_neon_vcvtpq_u32_v:
6579   case NEON::BI__builtin_neon_vcvtp_s64_v:
6580   case NEON::BI__builtin_neon_vcvtpq_s64_v:
6581   case NEON::BI__builtin_neon_vcvtp_u64_v:
6582   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6583     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6584     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6585     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6586   }
6587   case NEON::BI__builtin_neon_vmulx_v:
6588   case NEON::BI__builtin_neon_vmulxq_v: {
6589     Int = Intrinsic::aarch64_neon_fmulx;
6590     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6591   }
6592   case NEON::BI__builtin_neon_vmul_lane_v:
6593   case NEON::BI__builtin_neon_vmul_laneq_v: {
6594     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6595     bool Quad = false;
6596     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6597       Quad = true;
6598     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6599     llvm::Type *VTy = GetNeonType(this,
6600       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
6601     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6602     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6603     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
6604     return Builder.CreateBitCast(Result, Ty);
6605   }
6606   case NEON::BI__builtin_neon_vnegd_s64:
6607     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
6608   case NEON::BI__builtin_neon_vpmaxnm_v:
6609   case NEON::BI__builtin_neon_vpmaxnmq_v: {
6610     Int = Intrinsic::aarch64_neon_fmaxnmp;
6611     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
6612   }
6613   case NEON::BI__builtin_neon_vpminnm_v:
6614   case NEON::BI__builtin_neon_vpminnmq_v: {
6615     Int = Intrinsic::aarch64_neon_fminnmp;
6616     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
6617   }
6618   case NEON::BI__builtin_neon_vsqrt_v:
6619   case NEON::BI__builtin_neon_vsqrtq_v: {
6620     Int = Intrinsic::sqrt;
6621     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6622     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
6623   }
6624   case NEON::BI__builtin_neon_vrbit_v:
6625   case NEON::BI__builtin_neon_vrbitq_v: {
6626     Int = Intrinsic::aarch64_neon_rbit;
6627     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
6628   }
6629   case NEON::BI__builtin_neon_vaddv_u8:
6630     // FIXME: These are handled by the AArch64 scalar code.
6631     usgn = true;
6632     // FALLTHROUGH
6633   case NEON::BI__builtin_neon_vaddv_s8: {
6634     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6635     Ty = Int32Ty;
6636     VTy = llvm::VectorType::get(Int8Ty, 8);
6637     llvm::Type *Tys[2] = { Ty, VTy };
6638     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6639     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6640     return Builder.CreateTrunc(Ops[0], Int8Ty);
6641   }
6642   case NEON::BI__builtin_neon_vaddv_u16:
6643     usgn = true;
6644     // FALLTHROUGH
6645   case NEON::BI__builtin_neon_vaddv_s16: {
6646     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6647     Ty = Int32Ty;
6648     VTy = llvm::VectorType::get(Int16Ty, 4);
6649     llvm::Type *Tys[2] = { Ty, VTy };
6650     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6651     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6652     return Builder.CreateTrunc(Ops[0], Int16Ty);
6653   }
6654   case NEON::BI__builtin_neon_vaddvq_u8:
6655     usgn = true;
6656     // FALLTHROUGH
6657   case NEON::BI__builtin_neon_vaddvq_s8: {
6658     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6659     Ty = Int32Ty;
6660     VTy = llvm::VectorType::get(Int8Ty, 16);
6661     llvm::Type *Tys[2] = { Ty, VTy };
6662     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6663     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6664     return Builder.CreateTrunc(Ops[0], Int8Ty);
6665   }
6666   case NEON::BI__builtin_neon_vaddvq_u16:
6667     usgn = true;
6668     // FALLTHROUGH
6669   case NEON::BI__builtin_neon_vaddvq_s16: {
6670     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6671     Ty = Int32Ty;
6672     VTy = llvm::VectorType::get(Int16Ty, 8);
6673     llvm::Type *Tys[2] = { Ty, VTy };
6674     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6675     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6676     return Builder.CreateTrunc(Ops[0], Int16Ty);
6677   }
6678   case NEON::BI__builtin_neon_vmaxv_u8: {
6679     Int = Intrinsic::aarch64_neon_umaxv;
6680     Ty = Int32Ty;
6681     VTy = llvm::VectorType::get(Int8Ty, 8);
6682     llvm::Type *Tys[2] = { Ty, VTy };
6683     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6684     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6685     return Builder.CreateTrunc(Ops[0], Int8Ty);
6686   }
6687   case NEON::BI__builtin_neon_vmaxv_u16: {
6688     Int = Intrinsic::aarch64_neon_umaxv;
6689     Ty = Int32Ty;
6690     VTy = llvm::VectorType::get(Int16Ty, 4);
6691     llvm::Type *Tys[2] = { Ty, VTy };
6692     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6693     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6694     return Builder.CreateTrunc(Ops[0], Int16Ty);
6695   }
6696   case NEON::BI__builtin_neon_vmaxvq_u8: {
6697     Int = Intrinsic::aarch64_neon_umaxv;
6698     Ty = Int32Ty;
6699     VTy = llvm::VectorType::get(Int8Ty, 16);
6700     llvm::Type *Tys[2] = { Ty, VTy };
6701     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6702     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6703     return Builder.CreateTrunc(Ops[0], Int8Ty);
6704   }
6705   case NEON::BI__builtin_neon_vmaxvq_u16: {
6706     Int = Intrinsic::aarch64_neon_umaxv;
6707     Ty = Int32Ty;
6708     VTy = llvm::VectorType::get(Int16Ty, 8);
6709     llvm::Type *Tys[2] = { Ty, VTy };
6710     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6711     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6712     return Builder.CreateTrunc(Ops[0], Int16Ty);
6713   }
6714   case NEON::BI__builtin_neon_vmaxv_s8: {
6715     Int = Intrinsic::aarch64_neon_smaxv;
6716     Ty = Int32Ty;
6717     VTy = llvm::VectorType::get(Int8Ty, 8);
6718     llvm::Type *Tys[2] = { Ty, VTy };
6719     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6720     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6721     return Builder.CreateTrunc(Ops[0], Int8Ty);
6722   }
6723   case NEON::BI__builtin_neon_vmaxv_s16: {
6724     Int = Intrinsic::aarch64_neon_smaxv;
6725     Ty = Int32Ty;
6726     VTy = llvm::VectorType::get(Int16Ty, 4);
6727     llvm::Type *Tys[2] = { Ty, VTy };
6728     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6729     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6730     return Builder.CreateTrunc(Ops[0], Int16Ty);
6731   }
6732   case NEON::BI__builtin_neon_vmaxvq_s8: {
6733     Int = Intrinsic::aarch64_neon_smaxv;
6734     Ty = Int32Ty;
6735     VTy = llvm::VectorType::get(Int8Ty, 16);
6736     llvm::Type *Tys[2] = { Ty, VTy };
6737     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6738     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6739     return Builder.CreateTrunc(Ops[0], Int8Ty);
6740   }
6741   case NEON::BI__builtin_neon_vmaxvq_s16: {
6742     Int = Intrinsic::aarch64_neon_smaxv;
6743     Ty = Int32Ty;
6744     VTy = llvm::VectorType::get(Int16Ty, 8);
6745     llvm::Type *Tys[2] = { Ty, VTy };
6746     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6747     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6748     return Builder.CreateTrunc(Ops[0], Int16Ty);
6749   }
6750   case NEON::BI__builtin_neon_vminv_u8: {
6751     Int = Intrinsic::aarch64_neon_uminv;
6752     Ty = Int32Ty;
6753     VTy = llvm::VectorType::get(Int8Ty, 8);
6754     llvm::Type *Tys[2] = { Ty, VTy };
6755     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6756     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6757     return Builder.CreateTrunc(Ops[0], Int8Ty);
6758   }
6759   case NEON::BI__builtin_neon_vminv_u16: {
6760     Int = Intrinsic::aarch64_neon_uminv;
6761     Ty = Int32Ty;
6762     VTy = llvm::VectorType::get(Int16Ty, 4);
6763     llvm::Type *Tys[2] = { Ty, VTy };
6764     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6765     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6766     return Builder.CreateTrunc(Ops[0], Int16Ty);
6767   }
6768   case NEON::BI__builtin_neon_vminvq_u8: {
6769     Int = Intrinsic::aarch64_neon_uminv;
6770     Ty = Int32Ty;
6771     VTy = llvm::VectorType::get(Int8Ty, 16);
6772     llvm::Type *Tys[2] = { Ty, VTy };
6773     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6774     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6775     return Builder.CreateTrunc(Ops[0], Int8Ty);
6776   }
6777   case NEON::BI__builtin_neon_vminvq_u16: {
6778     Int = Intrinsic::aarch64_neon_uminv;
6779     Ty = Int32Ty;
6780     VTy = llvm::VectorType::get(Int16Ty, 8);
6781     llvm::Type *Tys[2] = { Ty, VTy };
6782     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6783     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6784     return Builder.CreateTrunc(Ops[0], Int16Ty);
6785   }
6786   case NEON::BI__builtin_neon_vminv_s8: {
6787     Int = Intrinsic::aarch64_neon_sminv;
6788     Ty = Int32Ty;
6789     VTy = llvm::VectorType::get(Int8Ty, 8);
6790     llvm::Type *Tys[2] = { Ty, VTy };
6791     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6792     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6793     return Builder.CreateTrunc(Ops[0], Int8Ty);
6794   }
6795   case NEON::BI__builtin_neon_vminv_s16: {
6796     Int = Intrinsic::aarch64_neon_sminv;
6797     Ty = Int32Ty;
6798     VTy = llvm::VectorType::get(Int16Ty, 4);
6799     llvm::Type *Tys[2] = { Ty, VTy };
6800     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6801     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6802     return Builder.CreateTrunc(Ops[0], Int16Ty);
6803   }
6804   case NEON::BI__builtin_neon_vminvq_s8: {
6805     Int = Intrinsic::aarch64_neon_sminv;
6806     Ty = Int32Ty;
6807     VTy = llvm::VectorType::get(Int8Ty, 16);
6808     llvm::Type *Tys[2] = { Ty, VTy };
6809     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6810     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6811     return Builder.CreateTrunc(Ops[0], Int8Ty);
6812   }
6813   case NEON::BI__builtin_neon_vminvq_s16: {
6814     Int = Intrinsic::aarch64_neon_sminv;
6815     Ty = Int32Ty;
6816     VTy = llvm::VectorType::get(Int16Ty, 8);
6817     llvm::Type *Tys[2] = { Ty, VTy };
6818     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6819     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6820     return Builder.CreateTrunc(Ops[0], Int16Ty);
6821   }
6822   case NEON::BI__builtin_neon_vmul_n_f64: {
6823     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6824     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
6825     return Builder.CreateFMul(Ops[0], RHS);
6826   }
6827   case NEON::BI__builtin_neon_vaddlv_u8: {
6828     Int = Intrinsic::aarch64_neon_uaddlv;
6829     Ty = Int32Ty;
6830     VTy = llvm::VectorType::get(Int8Ty, 8);
6831     llvm::Type *Tys[2] = { Ty, VTy };
6832     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6833     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6834     return Builder.CreateTrunc(Ops[0], Int16Ty);
6835   }
6836   case NEON::BI__builtin_neon_vaddlv_u16: {
6837     Int = Intrinsic::aarch64_neon_uaddlv;
6838     Ty = Int32Ty;
6839     VTy = llvm::VectorType::get(Int16Ty, 4);
6840     llvm::Type *Tys[2] = { Ty, VTy };
6841     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6842     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6843   }
6844   case NEON::BI__builtin_neon_vaddlvq_u8: {
6845     Int = Intrinsic::aarch64_neon_uaddlv;
6846     Ty = Int32Ty;
6847     VTy = llvm::VectorType::get(Int8Ty, 16);
6848     llvm::Type *Tys[2] = { Ty, VTy };
6849     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6850     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6851     return Builder.CreateTrunc(Ops[0], Int16Ty);
6852   }
6853   case NEON::BI__builtin_neon_vaddlvq_u16: {
6854     Int = Intrinsic::aarch64_neon_uaddlv;
6855     Ty = Int32Ty;
6856     VTy = llvm::VectorType::get(Int16Ty, 8);
6857     llvm::Type *Tys[2] = { Ty, VTy };
6858     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6859     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6860   }
6861   case NEON::BI__builtin_neon_vaddlv_s8: {
6862     Int = Intrinsic::aarch64_neon_saddlv;
6863     Ty = Int32Ty;
6864     VTy = llvm::VectorType::get(Int8Ty, 8);
6865     llvm::Type *Tys[2] = { Ty, VTy };
6866     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6867     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6868     return Builder.CreateTrunc(Ops[0], Int16Ty);
6869   }
6870   case NEON::BI__builtin_neon_vaddlv_s16: {
6871     Int = Intrinsic::aarch64_neon_saddlv;
6872     Ty = Int32Ty;
6873     VTy = llvm::VectorType::get(Int16Ty, 4);
6874     llvm::Type *Tys[2] = { Ty, VTy };
6875     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6876     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6877   }
6878   case NEON::BI__builtin_neon_vaddlvq_s8: {
6879     Int = Intrinsic::aarch64_neon_saddlv;
6880     Ty = Int32Ty;
6881     VTy = llvm::VectorType::get(Int8Ty, 16);
6882     llvm::Type *Tys[2] = { Ty, VTy };
6883     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6884     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6885     return Builder.CreateTrunc(Ops[0], Int16Ty);
6886   }
6887   case NEON::BI__builtin_neon_vaddlvq_s16: {
6888     Int = Intrinsic::aarch64_neon_saddlv;
6889     Ty = Int32Ty;
6890     VTy = llvm::VectorType::get(Int16Ty, 8);
6891     llvm::Type *Tys[2] = { Ty, VTy };
6892     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6893     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6894   }
6895   case NEON::BI__builtin_neon_vsri_n_v:
6896   case NEON::BI__builtin_neon_vsriq_n_v: {
6897     Int = Intrinsic::aarch64_neon_vsri;
6898     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6899     return EmitNeonCall(Intrin, Ops, "vsri_n");
6900   }
6901   case NEON::BI__builtin_neon_vsli_n_v:
6902   case NEON::BI__builtin_neon_vsliq_n_v: {
6903     Int = Intrinsic::aarch64_neon_vsli;
6904     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6905     return EmitNeonCall(Intrin, Ops, "vsli_n");
6906   }
6907   case NEON::BI__builtin_neon_vsra_n_v:
6908   case NEON::BI__builtin_neon_vsraq_n_v:
6909     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6910     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6911     return Builder.CreateAdd(Ops[0], Ops[1]);
6912   case NEON::BI__builtin_neon_vrsra_n_v:
6913   case NEON::BI__builtin_neon_vrsraq_n_v: {
6914     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6915     SmallVector<llvm::Value*,2> TmpOps;
6916     TmpOps.push_back(Ops[1]);
6917     TmpOps.push_back(Ops[2]);
6918     Function* F = CGM.getIntrinsic(Int, Ty);
6919     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6920     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6921     return Builder.CreateAdd(Ops[0], tmp);
6922   }
6923     // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
6924     // of an Align parameter here.
6925   case NEON::BI__builtin_neon_vld1_x2_v:
6926   case NEON::BI__builtin_neon_vld1q_x2_v:
6927   case NEON::BI__builtin_neon_vld1_x3_v:
6928   case NEON::BI__builtin_neon_vld1q_x3_v:
6929   case NEON::BI__builtin_neon_vld1_x4_v:
6930   case NEON::BI__builtin_neon_vld1q_x4_v: {
6931     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6932     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6933     llvm::Type *Tys[2] = { VTy, PTy };
6934     unsigned Int;
6935     switch (BuiltinID) {
6936     case NEON::BI__builtin_neon_vld1_x2_v:
6937     case NEON::BI__builtin_neon_vld1q_x2_v:
6938       Int = Intrinsic::aarch64_neon_ld1x2;
6939       break;
6940     case NEON::BI__builtin_neon_vld1_x3_v:
6941     case NEON::BI__builtin_neon_vld1q_x3_v:
6942       Int = Intrinsic::aarch64_neon_ld1x3;
6943       break;
6944     case NEON::BI__builtin_neon_vld1_x4_v:
6945     case NEON::BI__builtin_neon_vld1q_x4_v:
6946       Int = Intrinsic::aarch64_neon_ld1x4;
6947       break;
6948     }
6949     Function *F = CGM.getIntrinsic(Int, Tys);
6950     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
6951     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6952     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6953     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6954   }
6955   case NEON::BI__builtin_neon_vst1_x2_v:
6956   case NEON::BI__builtin_neon_vst1q_x2_v:
6957   case NEON::BI__builtin_neon_vst1_x3_v:
6958   case NEON::BI__builtin_neon_vst1q_x3_v:
6959   case NEON::BI__builtin_neon_vst1_x4_v:
6960   case NEON::BI__builtin_neon_vst1q_x4_v: {
6961     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6962     llvm::Type *Tys[2] = { VTy, PTy };
6963     unsigned Int;
6964     switch (BuiltinID) {
6965     case NEON::BI__builtin_neon_vst1_x2_v:
6966     case NEON::BI__builtin_neon_vst1q_x2_v:
6967       Int = Intrinsic::aarch64_neon_st1x2;
6968       break;
6969     case NEON::BI__builtin_neon_vst1_x3_v:
6970     case NEON::BI__builtin_neon_vst1q_x3_v:
6971       Int = Intrinsic::aarch64_neon_st1x3;
6972       break;
6973     case NEON::BI__builtin_neon_vst1_x4_v:
6974     case NEON::BI__builtin_neon_vst1q_x4_v:
6975       Int = Intrinsic::aarch64_neon_st1x4;
6976       break;
6977     }
6978     std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6979     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
6980   }
6981   case NEON::BI__builtin_neon_vld1_v:
6982   case NEON::BI__builtin_neon_vld1q_v: {
6983     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6984     auto Alignment = CharUnits::fromQuantity(
6985         BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16);
6986     return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
6987   }
6988   case NEON::BI__builtin_neon_vst1_v:
6989   case NEON::BI__builtin_neon_vst1q_v:
6990     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6991     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6992     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6993   case NEON::BI__builtin_neon_vld1_lane_v:
6994   case NEON::BI__builtin_neon_vld1q_lane_v: {
6995     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6996     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6997     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6998     auto Alignment = CharUnits::fromQuantity(
6999         BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16);
7000     Ops[0] =
7001         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
7002     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
7003   }
7004   case NEON::BI__builtin_neon_vld1_dup_v:
7005   case NEON::BI__builtin_neon_vld1q_dup_v: {
7006     Value *V = UndefValue::get(Ty);
7007     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
7008     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7009     auto Alignment = CharUnits::fromQuantity(
7010         BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16);
7011     Ops[0] =
7012         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
7013     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
7014     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
7015     return EmitNeonSplat(Ops[0], CI);
7016   }
7017   case NEON::BI__builtin_neon_vst1_lane_v:
7018   case NEON::BI__builtin_neon_vst1q_lane_v:
7019     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7020     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
7021     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7022     return Builder.CreateDefaultAlignedStore(Ops[1],
7023                                              Builder.CreateBitCast(Ops[0], Ty));
7024   case NEON::BI__builtin_neon_vld2_v:
7025   case NEON::BI__builtin_neon_vld2q_v: {
7026     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
7027     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7028     llvm::Type *Tys[2] = { VTy, PTy };
7029     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
7030     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
7031     Ops[0] = Builder.CreateBitCast(Ops[0],
7032                 llvm::PointerType::getUnqual(Ops[1]->getType()));
7033     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7034   }
7035   case NEON::BI__builtin_neon_vld3_v:
7036   case NEON::BI__builtin_neon_vld3q_v: {
7037     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
7038     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7039     llvm::Type *Tys[2] = { VTy, PTy };
7040     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
7041     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7042     Ops[0] = Builder.CreateBitCast(Ops[0],
7043                 llvm::PointerType::getUnqual(Ops[1]->getType()));
7044     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7045   }
7046   case NEON::BI__builtin_neon_vld4_v:
7047   case NEON::BI__builtin_neon_vld4q_v: {
7048     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
7049     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7050     llvm::Type *Tys[2] = { VTy, PTy };
7051     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
7052     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7053     Ops[0] = Builder.CreateBitCast(Ops[0],
7054                 llvm::PointerType::getUnqual(Ops[1]->getType()));
7055     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7056   }
7057   case NEON::BI__builtin_neon_vld2_dup_v:
7058   case NEON::BI__builtin_neon_vld2q_dup_v: {
7059     llvm::Type *PTy =
7060       llvm::PointerType::getUnqual(VTy->getElementType());
7061     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7062     llvm::Type *Tys[2] = { VTy, PTy };
7063     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
7064     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
7065     Ops[0] = Builder.CreateBitCast(Ops[0],
7066                 llvm::PointerType::getUnqual(Ops[1]->getType()));
7067     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7068   }
7069   case NEON::BI__builtin_neon_vld3_dup_v:
7070   case NEON::BI__builtin_neon_vld3q_dup_v: {
7071     llvm::Type *PTy =
7072       llvm::PointerType::getUnqual(VTy->getElementType());
7073     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7074     llvm::Type *Tys[2] = { VTy, PTy };
7075     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
7076     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7077     Ops[0] = Builder.CreateBitCast(Ops[0],
7078                 llvm::PointerType::getUnqual(Ops[1]->getType()));
7079     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7080   }
7081   case NEON::BI__builtin_neon_vld4_dup_v:
7082   case NEON::BI__builtin_neon_vld4q_dup_v: {
7083     llvm::Type *PTy =
7084       llvm::PointerType::getUnqual(VTy->getElementType());
7085     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7086     llvm::Type *Tys[2] = { VTy, PTy };
7087     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
7088     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7089     Ops[0] = Builder.CreateBitCast(Ops[0],
7090                 llvm::PointerType::getUnqual(Ops[1]->getType()));
7091     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7092   }
7093   case NEON::BI__builtin_neon_vld2_lane_v:
7094   case NEON::BI__builtin_neon_vld2q_lane_v: {
7095     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7096     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
7097     Ops.push_back(Ops[1]);
7098     Ops.erase(Ops.begin()+1);
7099     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7100     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7101     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7102     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
7103     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7104     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7105     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7106   }
7107   case NEON::BI__builtin_neon_vld3_lane_v:
7108   case NEON::BI__builtin_neon_vld3q_lane_v: {
7109     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7110     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
7111     Ops.push_back(Ops[1]);
7112     Ops.erase(Ops.begin()+1);
7113     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7114     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7115     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7116     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7117     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
7118     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7119     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7120     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7121   }
7122   case NEON::BI__builtin_neon_vld4_lane_v:
7123   case NEON::BI__builtin_neon_vld4q_lane_v: {
7124     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7125     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
7126     Ops.push_back(Ops[1]);
7127     Ops.erase(Ops.begin()+1);
7128     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7129     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7130     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7131     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
7132     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
7133     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
7134     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7135     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7136     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7137   }
7138   case NEON::BI__builtin_neon_vst2_v:
7139   case NEON::BI__builtin_neon_vst2q_v: {
7140     Ops.push_back(Ops[0]);
7141     Ops.erase(Ops.begin());
7142     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7143     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
7144                         Ops, "");
7145   }
7146   case NEON::BI__builtin_neon_vst2_lane_v:
7147   case NEON::BI__builtin_neon_vst2q_lane_v: {
7148     Ops.push_back(Ops[0]);
7149     Ops.erase(Ops.begin());
7150     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
7151     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7152     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
7153                         Ops, "");
7154   }
7155   case NEON::BI__builtin_neon_vst3_v:
7156   case NEON::BI__builtin_neon_vst3q_v: {
7157     Ops.push_back(Ops[0]);
7158     Ops.erase(Ops.begin());
7159     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7160     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
7161                         Ops, "");
7162   }
7163   case NEON::BI__builtin_neon_vst3_lane_v:
7164   case NEON::BI__builtin_neon_vst3q_lane_v: {
7165     Ops.push_back(Ops[0]);
7166     Ops.erase(Ops.begin());
7167     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7168     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7169     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
7170                         Ops, "");
7171   }
7172   case NEON::BI__builtin_neon_vst4_v:
7173   case NEON::BI__builtin_neon_vst4q_v: {
7174     Ops.push_back(Ops[0]);
7175     Ops.erase(Ops.begin());
7176     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7177     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
7178                         Ops, "");
7179   }
7180   case NEON::BI__builtin_neon_vst4_lane_v:
7181   case NEON::BI__builtin_neon_vst4q_lane_v: {
7182     Ops.push_back(Ops[0]);
7183     Ops.erase(Ops.begin());
7184     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7185     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7186     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
7187                         Ops, "");
7188   }
7189   case NEON::BI__builtin_neon_vtrn_v:
7190   case NEON::BI__builtin_neon_vtrnq_v: {
7191     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7192     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7193     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7194     Value *SV = nullptr;
7195 
7196     for (unsigned vi = 0; vi != 2; ++vi) {
7197       SmallVector<uint32_t, 16> Indices;
7198       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7199         Indices.push_back(i+vi);
7200         Indices.push_back(i+e+vi);
7201       }
7202       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7203       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
7204       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7205     }
7206     return SV;
7207   }
7208   case NEON::BI__builtin_neon_vuzp_v:
7209   case NEON::BI__builtin_neon_vuzpq_v: {
7210     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7211     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7212     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7213     Value *SV = nullptr;
7214 
7215     for (unsigned vi = 0; vi != 2; ++vi) {
7216       SmallVector<uint32_t, 16> Indices;
7217       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7218         Indices.push_back(2*i+vi);
7219 
7220       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7221       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
7222       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7223     }
7224     return SV;
7225   }
7226   case NEON::BI__builtin_neon_vzip_v:
7227   case NEON::BI__builtin_neon_vzipq_v: {
7228     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7229     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7230     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7231     Value *SV = nullptr;
7232 
7233     for (unsigned vi = 0; vi != 2; ++vi) {
7234       SmallVector<uint32_t, 16> Indices;
7235       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7236         Indices.push_back((i + vi*e) >> 1);
7237         Indices.push_back(((i + vi*e) >> 1)+e);
7238       }
7239       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7240       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
7241       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7242     }
7243     return SV;
7244   }
7245   case NEON::BI__builtin_neon_vqtbl1q_v: {
7246     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7247                         Ops, "vtbl1");
7248   }
7249   case NEON::BI__builtin_neon_vqtbl2q_v: {
7250     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7251                         Ops, "vtbl2");
7252   }
7253   case NEON::BI__builtin_neon_vqtbl3q_v: {
7254     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7255                         Ops, "vtbl3");
7256   }
7257   case NEON::BI__builtin_neon_vqtbl4q_v: {
7258     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7259                         Ops, "vtbl4");
7260   }
7261   case NEON::BI__builtin_neon_vqtbx1q_v: {
7262     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7263                         Ops, "vtbx1");
7264   }
7265   case NEON::BI__builtin_neon_vqtbx2q_v: {
7266     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7267                         Ops, "vtbx2");
7268   }
7269   case NEON::BI__builtin_neon_vqtbx3q_v: {
7270     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7271                         Ops, "vtbx3");
7272   }
7273   case NEON::BI__builtin_neon_vqtbx4q_v: {
7274     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7275                         Ops, "vtbx4");
7276   }
7277   case NEON::BI__builtin_neon_vsqadd_v:
7278   case NEON::BI__builtin_neon_vsqaddq_v: {
7279     Int = Intrinsic::aarch64_neon_usqadd;
7280     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
7281   }
7282   case NEON::BI__builtin_neon_vuqadd_v:
7283   case NEON::BI__builtin_neon_vuqaddq_v: {
7284     Int = Intrinsic::aarch64_neon_suqadd;
7285     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
7286   }
7287   }
7288 }
7289 
7290 llvm::Value *CodeGenFunction::
7291 BuildVector(ArrayRef<llvm::Value*> Ops) {
7292   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7293          "Not a power-of-two sized vector!");
7294   bool AllConstants = true;
7295   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7296     AllConstants &= isa<Constant>(Ops[i]);
7297 
7298   // If this is a constant vector, create a ConstantVector.
7299   if (AllConstants) {
7300     SmallVector<llvm::Constant*, 16> CstOps;
7301     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7302       CstOps.push_back(cast<Constant>(Ops[i]));
7303     return llvm::ConstantVector::get(CstOps);
7304   }
7305 
7306   // Otherwise, insertelement the values to build the vector.
7307   Value *Result =
7308     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
7309 
7310   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7311     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
7312 
7313   return Result;
7314 }
7315 
7316 // Convert the mask from an integer type to a vector of i1.
7317 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
7318                               unsigned NumElts) {
7319 
7320   llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
7321                          cast<IntegerType>(Mask->getType())->getBitWidth());
7322   Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
7323 
7324   // If we have less than 8 elements, then the starting mask was an i8 and
7325   // we need to extract down to the right number of elements.
7326   if (NumElts < 8) {
7327     uint32_t Indices[4];
7328     for (unsigned i = 0; i != NumElts; ++i)
7329       Indices[i] = i;
7330     MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
7331                                              makeArrayRef(Indices, NumElts),
7332                                              "extract");
7333   }
7334   return MaskVec;
7335 }
7336 
7337 static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
7338                                  SmallVectorImpl<Value *> &Ops,
7339                                  unsigned Align) {
7340   // Cast the pointer to right type.
7341   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7342                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7343 
7344   // If the mask is all ones just emit a regular store.
7345   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7346     if (C->isAllOnesValue())
7347       return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
7348 
7349   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7350                                    Ops[1]->getType()->getVectorNumElements());
7351 
7352   return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
7353 }
7354 
7355 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
7356                                 SmallVectorImpl<Value *> &Ops, unsigned Align) {
7357   // Cast the pointer to right type.
7358   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7359                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7360 
7361   // If the mask is all ones just emit a regular store.
7362   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7363     if (C->isAllOnesValue())
7364       return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7365 
7366   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7367                                    Ops[1]->getType()->getVectorNumElements());
7368 
7369   return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
7370 }
7371 
7372 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
7373                                         SmallVectorImpl<Value *> &Ops,
7374                                         llvm::Type *DstTy,
7375                                         unsigned SrcSizeInBits,
7376                                         unsigned Align) {
7377   // Load the subvector.
7378   Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7379 
7380   // Create broadcast mask.
7381   unsigned NumDstElts = DstTy->getVectorNumElements();
7382   unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
7383 
7384   SmallVector<uint32_t, 8> Mask;
7385   for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
7386     for (unsigned j = 0; j != NumSrcElts; ++j)
7387       Mask.push_back(j);
7388 
7389   return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
7390 }
7391 
7392 static Value *EmitX86Select(CodeGenFunction &CGF,
7393                             Value *Mask, Value *Op0, Value *Op1) {
7394 
7395   // If the mask is all ones just return first argument.
7396   if (const auto *C = dyn_cast<Constant>(Mask))
7397     if (C->isAllOnesValue())
7398       return Op0;
7399 
7400   Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
7401 
7402   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
7403 }
7404 
7405 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
7406                                    bool Signed, SmallVectorImpl<Value *> &Ops) {
7407   unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7408   Value *Cmp;
7409 
7410   if (CC == 3) {
7411     Cmp = Constant::getNullValue(
7412                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7413   } else if (CC == 7) {
7414     Cmp = Constant::getAllOnesValue(
7415                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7416   } else {
7417     ICmpInst::Predicate Pred;
7418     switch (CC) {
7419     default: llvm_unreachable("Unknown condition code");
7420     case 0: Pred = ICmpInst::ICMP_EQ;  break;
7421     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
7422     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
7423     case 4: Pred = ICmpInst::ICMP_NE;  break;
7424     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
7425     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
7426     }
7427     Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7428   }
7429 
7430   const auto *C = dyn_cast<Constant>(Ops.back());
7431   if (!C || !C->isAllOnesValue())
7432     Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
7433 
7434   if (NumElts < 8) {
7435     uint32_t Indices[8];
7436     for (unsigned i = 0; i != NumElts; ++i)
7437       Indices[i] = i;
7438     for (unsigned i = NumElts; i != 8; ++i)
7439       Indices[i] = i % NumElts + NumElts;
7440     Cmp = CGF.Builder.CreateShuffleVector(
7441         Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
7442   }
7443   return CGF.Builder.CreateBitCast(Cmp,
7444                                    IntegerType::get(CGF.getLLVMContext(),
7445                                                     std::max(NumElts, 8U)));
7446 }
7447 
7448 static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) {
7449 
7450   llvm::Type *Ty = Ops[0]->getType();
7451   Value *Zero = llvm::Constant::getNullValue(Ty);
7452   Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]);
7453   Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero);
7454   Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub);
7455   if (Ops.size() == 1)
7456     return Res;
7457   return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
7458 }
7459 
7460 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
7461                             ArrayRef<Value *> Ops) {
7462   Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7463   Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7464 
7465   if (Ops.size() == 2)
7466     return Res;
7467 
7468   assert(Ops.size() == 4);
7469   return EmitX86Select(CGF, Ops[3], Res, Ops[2]);
7470 }
7471 
7472 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
7473                               llvm::Type *DstTy) {
7474   unsigned NumberOfElements = DstTy->getVectorNumElements();
7475   Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
7476   return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
7477 }
7478 
7479 Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
7480   const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
7481   StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
7482   return EmitX86CpuIs(CPUStr);
7483 }
7484 
7485 Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
7486 
7487   // This enum contains the vendor, type, and subtype enums from the
7488   // runtime library concatenated together. The _START labels mark
7489   // the start and are used to adjust the value into the correct
7490   // encoding space.
7491   enum X86CPUs {
7492     INTEL = 1,
7493     AMD,
7494     CPU_TYPE_START,
7495     INTEL_BONNELL,
7496     INTEL_CORE2,
7497     INTEL_COREI7,
7498     AMDFAM10H,
7499     AMDFAM15H,
7500     INTEL_SILVERMONT,
7501     INTEL_KNL,
7502     AMD_BTVER1,
7503     AMD_BTVER2,
7504     CPU_SUBTYPE_START,
7505     INTEL_COREI7_NEHALEM,
7506     INTEL_COREI7_WESTMERE,
7507     INTEL_COREI7_SANDYBRIDGE,
7508     AMDFAM10H_BARCELONA,
7509     AMDFAM10H_SHANGHAI,
7510     AMDFAM10H_ISTANBUL,
7511     AMDFAM15H_BDVER1,
7512     AMDFAM15H_BDVER2,
7513     AMDFAM15H_BDVER3,
7514     AMDFAM15H_BDVER4,
7515     AMDFAM17H_ZNVER1,
7516     INTEL_COREI7_IVYBRIDGE,
7517     INTEL_COREI7_HASWELL,
7518     INTEL_COREI7_BROADWELL,
7519     INTEL_COREI7_SKYLAKE,
7520     INTEL_COREI7_SKYLAKE_AVX512,
7521   };
7522 
7523   X86CPUs CPU =
7524     StringSwitch<X86CPUs>(CPUStr)
7525       .Case("amd", AMD)
7526       .Case("amdfam10h", AMDFAM10H)
7527       .Case("amdfam10", AMDFAM10H)
7528       .Case("amdfam15h", AMDFAM15H)
7529       .Case("amdfam15", AMDFAM15H)
7530       .Case("atom", INTEL_BONNELL)
7531       .Case("barcelona", AMDFAM10H_BARCELONA)
7532       .Case("bdver1", AMDFAM15H_BDVER1)
7533       .Case("bdver2", AMDFAM15H_BDVER2)
7534       .Case("bdver3", AMDFAM15H_BDVER3)
7535       .Case("bdver4", AMDFAM15H_BDVER4)
7536       .Case("bonnell", INTEL_BONNELL)
7537       .Case("broadwell", INTEL_COREI7_BROADWELL)
7538       .Case("btver1", AMD_BTVER1)
7539       .Case("btver2", AMD_BTVER2)
7540       .Case("core2", INTEL_CORE2)
7541       .Case("corei7", INTEL_COREI7)
7542       .Case("haswell", INTEL_COREI7_HASWELL)
7543       .Case("intel", INTEL)
7544       .Case("istanbul", AMDFAM10H_ISTANBUL)
7545       .Case("ivybridge", INTEL_COREI7_IVYBRIDGE)
7546       .Case("knl", INTEL_KNL)
7547       .Case("nehalem", INTEL_COREI7_NEHALEM)
7548       .Case("sandybridge", INTEL_COREI7_SANDYBRIDGE)
7549       .Case("shanghai", AMDFAM10H_SHANGHAI)
7550       .Case("silvermont", INTEL_SILVERMONT)
7551       .Case("skylake", INTEL_COREI7_SKYLAKE)
7552       .Case("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512)
7553       .Case("slm", INTEL_SILVERMONT)
7554       .Case("westmere", INTEL_COREI7_WESTMERE)
7555       .Case("znver1", AMDFAM17H_ZNVER1);
7556 
7557   llvm::Type *Int32Ty = Builder.getInt32Ty();
7558 
7559   // Matching the struct layout from the compiler-rt/libgcc structure that is
7560   // filled in:
7561   // unsigned int __cpu_vendor;
7562   // unsigned int __cpu_type;
7563   // unsigned int __cpu_subtype;
7564   // unsigned int __cpu_features[1];
7565   llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
7566                                           llvm::ArrayType::get(Int32Ty, 1));
7567 
7568   // Grab the global __cpu_model.
7569   llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
7570 
7571   // Calculate the index needed to access the correct field based on the
7572   // range. Also adjust the expected value.
7573   unsigned Index;
7574   unsigned Value;
7575   if (CPU > CPU_SUBTYPE_START) {
7576     Index = 2;
7577     Value = CPU - CPU_SUBTYPE_START;
7578   } else if (CPU > CPU_TYPE_START) {
7579     Index = 1;
7580     Value = CPU - CPU_TYPE_START;
7581   } else {
7582     Index = 0;
7583     Value = CPU;
7584   }
7585 
7586   // Grab the appropriate field from __cpu_model.
7587   llvm::Value *Idxs[] = {
7588     ConstantInt::get(Int32Ty, 0),
7589     ConstantInt::get(Int32Ty, Index)
7590   };
7591   llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
7592   CpuValue = Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4));
7593 
7594   // Check the value of the field against the requested value.
7595   return Builder.CreateICmpEQ(CpuValue,
7596                                   llvm::ConstantInt::get(Int32Ty, Value));
7597 }
7598 
7599 Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
7600   const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
7601   StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
7602   return EmitX86CpuSupports(FeatureStr);
7603 }
7604 
7605 Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
7606   // TODO: When/if this becomes more than x86 specific then use a TargetInfo
7607   // based mapping.
7608   // Processor features and mapping to processor feature value.
7609   enum X86Features {
7610     CMOV = 0,
7611     MMX,
7612     POPCNT,
7613     SSE,
7614     SSE2,
7615     SSE3,
7616     SSSE3,
7617     SSE4_1,
7618     SSE4_2,
7619     AVX,
7620     AVX2,
7621     SSE4_A,
7622     FMA4,
7623     XOP,
7624     FMA,
7625     AVX512F,
7626     BMI,
7627     BMI2,
7628     AES,
7629     PCLMUL,
7630     AVX512VL,
7631     AVX512BW,
7632     AVX512DQ,
7633     AVX512CD,
7634     AVX512ER,
7635     AVX512PF,
7636     AVX512VBMI,
7637     AVX512IFMA,
7638     AVX5124VNNIW,
7639     AVX5124FMAPS,
7640     AVX512VPOPCNTDQ,
7641     MAX
7642   };
7643 
7644   uint32_t FeaturesMask = 0;
7645 
7646   for (const StringRef &FeatureStr : FeatureStrs) {
7647     X86Features Feature =
7648         StringSwitch<X86Features>(FeatureStr)
7649             .Case("cmov", X86Features::CMOV)
7650             .Case("mmx", X86Features::MMX)
7651             .Case("popcnt", X86Features::POPCNT)
7652             .Case("sse", X86Features::SSE)
7653             .Case("sse2", X86Features::SSE2)
7654             .Case("sse3", X86Features::SSE3)
7655             .Case("ssse3", X86Features::SSSE3)
7656             .Case("sse4.1", X86Features::SSE4_1)
7657             .Case("sse4.2", X86Features::SSE4_2)
7658             .Case("avx", X86Features::AVX)
7659             .Case("avx2", X86Features::AVX2)
7660             .Case("sse4a", X86Features::SSE4_A)
7661             .Case("fma4", X86Features::FMA4)
7662             .Case("xop", X86Features::XOP)
7663             .Case("fma", X86Features::FMA)
7664             .Case("avx512f", X86Features::AVX512F)
7665             .Case("bmi", X86Features::BMI)
7666             .Case("bmi2", X86Features::BMI2)
7667             .Case("aes", X86Features::AES)
7668             .Case("pclmul", X86Features::PCLMUL)
7669             .Case("avx512vl", X86Features::AVX512VL)
7670             .Case("avx512bw", X86Features::AVX512BW)
7671             .Case("avx512dq", X86Features::AVX512DQ)
7672             .Case("avx512cd", X86Features::AVX512CD)
7673             .Case("avx512er", X86Features::AVX512ER)
7674             .Case("avx512pf", X86Features::AVX512PF)
7675             .Case("avx512vbmi", X86Features::AVX512VBMI)
7676             .Case("avx512ifma", X86Features::AVX512IFMA)
7677             .Case("avx5124vnniw", X86Features::AVX5124VNNIW)
7678             .Case("avx5124fmaps", X86Features::AVX5124FMAPS)
7679             .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ)
7680             .Default(X86Features::MAX);
7681     assert(Feature != X86Features::MAX && "Invalid feature!");
7682     FeaturesMask |= (1U << Feature);
7683   }
7684 
7685   // Matching the struct layout from the compiler-rt/libgcc structure that is
7686   // filled in:
7687   // unsigned int __cpu_vendor;
7688   // unsigned int __cpu_type;
7689   // unsigned int __cpu_subtype;
7690   // unsigned int __cpu_features[1];
7691   llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
7692                                           llvm::ArrayType::get(Int32Ty, 1));
7693 
7694   // Grab the global __cpu_model.
7695   llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
7696 
7697   // Grab the first (0th) element from the field __cpu_features off of the
7698   // global in the struct STy.
7699   Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 3),
7700                    ConstantInt::get(Int32Ty, 0)};
7701   Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
7702   Value *Features =
7703       Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4));
7704 
7705   // Check the value of the bit corresponding to the feature requested.
7706   Value *Bitset = Builder.CreateAnd(
7707       Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask));
7708   return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
7709 }
7710 
7711 Value *CodeGenFunction::EmitX86CpuInit() {
7712   llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
7713                                                     /*Variadic*/ false);
7714   llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
7715   return Builder.CreateCall(Func);
7716 }
7717 
7718 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
7719                                            const CallExpr *E) {
7720   if (BuiltinID == X86::BI__builtin_cpu_is)
7721     return EmitX86CpuIs(E);
7722   if (BuiltinID == X86::BI__builtin_cpu_supports)
7723     return EmitX86CpuSupports(E);
7724   if (BuiltinID == X86::BI__builtin_cpu_init)
7725     return EmitX86CpuInit();
7726 
7727   SmallVector<Value*, 4> Ops;
7728 
7729   // Find out if any arguments are required to be integer constant expressions.
7730   unsigned ICEArguments = 0;
7731   ASTContext::GetBuiltinTypeError Error;
7732   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
7733   assert(Error == ASTContext::GE_None && "Should not codegen an error");
7734 
7735   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
7736     // If this is a normal argument, just emit it as a scalar.
7737     if ((ICEArguments & (1 << i)) == 0) {
7738       Ops.push_back(EmitScalarExpr(E->getArg(i)));
7739       continue;
7740     }
7741 
7742     // If this is required to be a constant, constant fold it so that we know
7743     // that the generated intrinsic gets a ConstantInt.
7744     llvm::APSInt Result;
7745     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
7746     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
7747     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
7748   }
7749 
7750   // These exist so that the builtin that takes an immediate can be bounds
7751   // checked by clang to avoid passing bad immediates to the backend. Since
7752   // AVX has a larger immediate than SSE we would need separate builtins to
7753   // do the different bounds checking. Rather than create a clang specific
7754   // SSE only builtin, this implements eight separate builtins to match gcc
7755   // implementation.
7756   auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
7757     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
7758     llvm::Function *F = CGM.getIntrinsic(ID);
7759     return Builder.CreateCall(F, Ops);
7760   };
7761 
7762   // For the vector forms of FP comparisons, translate the builtins directly to
7763   // IR.
7764   // TODO: The builtins could be removed if the SSE header files used vector
7765   // extension comparisons directly (vector ordered/unordered may need
7766   // additional support via __builtin_isnan()).
7767   auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
7768     Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
7769     llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
7770     llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
7771     Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
7772     return Builder.CreateBitCast(Sext, FPVecTy);
7773   };
7774 
7775   switch (BuiltinID) {
7776   default: return nullptr;
7777   case X86::BI_mm_prefetch: {
7778     Value *Address = Ops[0];
7779     Value *RW = ConstantInt::get(Int32Ty, 0);
7780     Value *Locality = Ops[1];
7781     Value *Data = ConstantInt::get(Int32Ty, 1);
7782     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
7783     return Builder.CreateCall(F, {Address, RW, Locality, Data});
7784   }
7785   case X86::BI_mm_clflush: {
7786     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
7787                               Ops[0]);
7788   }
7789   case X86::BI_mm_lfence: {
7790     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
7791   }
7792   case X86::BI_mm_mfence: {
7793     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
7794   }
7795   case X86::BI_mm_sfence: {
7796     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
7797   }
7798   case X86::BI_mm_pause: {
7799     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
7800   }
7801   case X86::BI__rdtsc: {
7802     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
7803   }
7804   case X86::BI__builtin_ia32_undef128:
7805   case X86::BI__builtin_ia32_undef256:
7806   case X86::BI__builtin_ia32_undef512:
7807     // The x86 definition of "undef" is not the same as the LLVM definition
7808     // (PR32176). We leave optimizing away an unnecessary zero constant to the
7809     // IR optimizer and backend.
7810     // TODO: If we had a "freeze" IR instruction to generate a fixed undef
7811     // value, we should use that here instead of a zero.
7812     return llvm::Constant::getNullValue(ConvertType(E->getType()));
7813   case X86::BI__builtin_ia32_vec_init_v8qi:
7814   case X86::BI__builtin_ia32_vec_init_v4hi:
7815   case X86::BI__builtin_ia32_vec_init_v2si:
7816     return Builder.CreateBitCast(BuildVector(Ops),
7817                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
7818   case X86::BI__builtin_ia32_vec_ext_v2si:
7819     return Builder.CreateExtractElement(Ops[0],
7820                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
7821   case X86::BI_mm_setcsr:
7822   case X86::BI__builtin_ia32_ldmxcsr: {
7823     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
7824     Builder.CreateStore(Ops[0], Tmp);
7825     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
7826                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7827   }
7828   case X86::BI_mm_getcsr:
7829   case X86::BI__builtin_ia32_stmxcsr: {
7830     Address Tmp = CreateMemTemp(E->getType());
7831     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
7832                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7833     return Builder.CreateLoad(Tmp, "stmxcsr");
7834   }
7835   case X86::BI__builtin_ia32_xsave:
7836   case X86::BI__builtin_ia32_xsave64:
7837   case X86::BI__builtin_ia32_xrstor:
7838   case X86::BI__builtin_ia32_xrstor64:
7839   case X86::BI__builtin_ia32_xsaveopt:
7840   case X86::BI__builtin_ia32_xsaveopt64:
7841   case X86::BI__builtin_ia32_xrstors:
7842   case X86::BI__builtin_ia32_xrstors64:
7843   case X86::BI__builtin_ia32_xsavec:
7844   case X86::BI__builtin_ia32_xsavec64:
7845   case X86::BI__builtin_ia32_xsaves:
7846   case X86::BI__builtin_ia32_xsaves64: {
7847     Intrinsic::ID ID;
7848 #define INTRINSIC_X86_XSAVE_ID(NAME) \
7849     case X86::BI__builtin_ia32_##NAME: \
7850       ID = Intrinsic::x86_##NAME; \
7851       break
7852     switch (BuiltinID) {
7853     default: llvm_unreachable("Unsupported intrinsic!");
7854     INTRINSIC_X86_XSAVE_ID(xsave);
7855     INTRINSIC_X86_XSAVE_ID(xsave64);
7856     INTRINSIC_X86_XSAVE_ID(xrstor);
7857     INTRINSIC_X86_XSAVE_ID(xrstor64);
7858     INTRINSIC_X86_XSAVE_ID(xsaveopt);
7859     INTRINSIC_X86_XSAVE_ID(xsaveopt64);
7860     INTRINSIC_X86_XSAVE_ID(xrstors);
7861     INTRINSIC_X86_XSAVE_ID(xrstors64);
7862     INTRINSIC_X86_XSAVE_ID(xsavec);
7863     INTRINSIC_X86_XSAVE_ID(xsavec64);
7864     INTRINSIC_X86_XSAVE_ID(xsaves);
7865     INTRINSIC_X86_XSAVE_ID(xsaves64);
7866     }
7867 #undef INTRINSIC_X86_XSAVE_ID
7868     Value *Mhi = Builder.CreateTrunc(
7869       Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
7870     Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
7871     Ops[1] = Mhi;
7872     Ops.push_back(Mlo);
7873     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7874   }
7875   case X86::BI__builtin_ia32_storedqudi128_mask:
7876   case X86::BI__builtin_ia32_storedqusi128_mask:
7877   case X86::BI__builtin_ia32_storedquhi128_mask:
7878   case X86::BI__builtin_ia32_storedquqi128_mask:
7879   case X86::BI__builtin_ia32_storeupd128_mask:
7880   case X86::BI__builtin_ia32_storeups128_mask:
7881   case X86::BI__builtin_ia32_storedqudi256_mask:
7882   case X86::BI__builtin_ia32_storedqusi256_mask:
7883   case X86::BI__builtin_ia32_storedquhi256_mask:
7884   case X86::BI__builtin_ia32_storedquqi256_mask:
7885   case X86::BI__builtin_ia32_storeupd256_mask:
7886   case X86::BI__builtin_ia32_storeups256_mask:
7887   case X86::BI__builtin_ia32_storedqudi512_mask:
7888   case X86::BI__builtin_ia32_storedqusi512_mask:
7889   case X86::BI__builtin_ia32_storedquhi512_mask:
7890   case X86::BI__builtin_ia32_storedquqi512_mask:
7891   case X86::BI__builtin_ia32_storeupd512_mask:
7892   case X86::BI__builtin_ia32_storeups512_mask:
7893     return EmitX86MaskedStore(*this, Ops, 1);
7894 
7895   case X86::BI__builtin_ia32_storess128_mask:
7896   case X86::BI__builtin_ia32_storesd128_mask: {
7897     return EmitX86MaskedStore(*this, Ops, 16);
7898   }
7899   case X86::BI__builtin_ia32_vpopcntd_512:
7900   case X86::BI__builtin_ia32_vpopcntq_512: {
7901     llvm::Type *ResultType = ConvertType(E->getType());
7902     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
7903     return Builder.CreateCall(F, Ops);
7904   }
7905   case X86::BI__builtin_ia32_cvtmask2b128:
7906   case X86::BI__builtin_ia32_cvtmask2b256:
7907   case X86::BI__builtin_ia32_cvtmask2b512:
7908   case X86::BI__builtin_ia32_cvtmask2w128:
7909   case X86::BI__builtin_ia32_cvtmask2w256:
7910   case X86::BI__builtin_ia32_cvtmask2w512:
7911   case X86::BI__builtin_ia32_cvtmask2d128:
7912   case X86::BI__builtin_ia32_cvtmask2d256:
7913   case X86::BI__builtin_ia32_cvtmask2d512:
7914   case X86::BI__builtin_ia32_cvtmask2q128:
7915   case X86::BI__builtin_ia32_cvtmask2q256:
7916   case X86::BI__builtin_ia32_cvtmask2q512:
7917     return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
7918 
7919   case X86::BI__builtin_ia32_movdqa32store128_mask:
7920   case X86::BI__builtin_ia32_movdqa64store128_mask:
7921   case X86::BI__builtin_ia32_storeaps128_mask:
7922   case X86::BI__builtin_ia32_storeapd128_mask:
7923   case X86::BI__builtin_ia32_movdqa32store256_mask:
7924   case X86::BI__builtin_ia32_movdqa64store256_mask:
7925   case X86::BI__builtin_ia32_storeaps256_mask:
7926   case X86::BI__builtin_ia32_storeapd256_mask:
7927   case X86::BI__builtin_ia32_movdqa32store512_mask:
7928   case X86::BI__builtin_ia32_movdqa64store512_mask:
7929   case X86::BI__builtin_ia32_storeaps512_mask:
7930   case X86::BI__builtin_ia32_storeapd512_mask: {
7931     unsigned Align =
7932       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7933     return EmitX86MaskedStore(*this, Ops, Align);
7934   }
7935   case X86::BI__builtin_ia32_loadups128_mask:
7936   case X86::BI__builtin_ia32_loadups256_mask:
7937   case X86::BI__builtin_ia32_loadups512_mask:
7938   case X86::BI__builtin_ia32_loadupd128_mask:
7939   case X86::BI__builtin_ia32_loadupd256_mask:
7940   case X86::BI__builtin_ia32_loadupd512_mask:
7941   case X86::BI__builtin_ia32_loaddquqi128_mask:
7942   case X86::BI__builtin_ia32_loaddquqi256_mask:
7943   case X86::BI__builtin_ia32_loaddquqi512_mask:
7944   case X86::BI__builtin_ia32_loaddquhi128_mask:
7945   case X86::BI__builtin_ia32_loaddquhi256_mask:
7946   case X86::BI__builtin_ia32_loaddquhi512_mask:
7947   case X86::BI__builtin_ia32_loaddqusi128_mask:
7948   case X86::BI__builtin_ia32_loaddqusi256_mask:
7949   case X86::BI__builtin_ia32_loaddqusi512_mask:
7950   case X86::BI__builtin_ia32_loaddqudi128_mask:
7951   case X86::BI__builtin_ia32_loaddqudi256_mask:
7952   case X86::BI__builtin_ia32_loaddqudi512_mask:
7953     return EmitX86MaskedLoad(*this, Ops, 1);
7954 
7955   case X86::BI__builtin_ia32_loadss128_mask:
7956   case X86::BI__builtin_ia32_loadsd128_mask:
7957     return EmitX86MaskedLoad(*this, Ops, 16);
7958 
7959   case X86::BI__builtin_ia32_loadaps128_mask:
7960   case X86::BI__builtin_ia32_loadaps256_mask:
7961   case X86::BI__builtin_ia32_loadaps512_mask:
7962   case X86::BI__builtin_ia32_loadapd128_mask:
7963   case X86::BI__builtin_ia32_loadapd256_mask:
7964   case X86::BI__builtin_ia32_loadapd512_mask:
7965   case X86::BI__builtin_ia32_movdqa32load128_mask:
7966   case X86::BI__builtin_ia32_movdqa32load256_mask:
7967   case X86::BI__builtin_ia32_movdqa32load512_mask:
7968   case X86::BI__builtin_ia32_movdqa64load128_mask:
7969   case X86::BI__builtin_ia32_movdqa64load256_mask:
7970   case X86::BI__builtin_ia32_movdqa64load512_mask: {
7971     unsigned Align =
7972       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7973     return EmitX86MaskedLoad(*this, Ops, Align);
7974   }
7975 
7976   case X86::BI__builtin_ia32_vbroadcastf128_pd256:
7977   case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
7978     llvm::Type *DstTy = ConvertType(E->getType());
7979     return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
7980   }
7981 
7982   case X86::BI__builtin_ia32_storehps:
7983   case X86::BI__builtin_ia32_storelps: {
7984     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
7985     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
7986 
7987     // cast val v2i64
7988     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
7989 
7990     // extract (0, 1)
7991     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
7992     llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
7993     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
7994 
7995     // cast pointer to i64 & store
7996     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
7997     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7998   }
7999   case X86::BI__builtin_ia32_palignr128:
8000   case X86::BI__builtin_ia32_palignr256:
8001   case X86::BI__builtin_ia32_palignr512_mask: {
8002     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
8003 
8004     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
8005     assert(NumElts % 16 == 0);
8006 
8007     // If palignr is shifting the pair of vectors more than the size of two
8008     // lanes, emit zero.
8009     if (ShiftVal >= 32)
8010       return llvm::Constant::getNullValue(ConvertType(E->getType()));
8011 
8012     // If palignr is shifting the pair of input vectors more than one lane,
8013     // but less than two lanes, convert to shifting in zeroes.
8014     if (ShiftVal > 16) {
8015       ShiftVal -= 16;
8016       Ops[1] = Ops[0];
8017       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
8018     }
8019 
8020     uint32_t Indices[64];
8021     // 256-bit palignr operates on 128-bit lanes so we need to handle that
8022     for (unsigned l = 0; l != NumElts; l += 16) {
8023       for (unsigned i = 0; i != 16; ++i) {
8024         unsigned Idx = ShiftVal + i;
8025         if (Idx >= 16)
8026           Idx += NumElts - 16; // End of lane, switch operand.
8027         Indices[l + i] = Idx + l;
8028       }
8029     }
8030 
8031     Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
8032                                                makeArrayRef(Indices, NumElts),
8033                                                "palignr");
8034 
8035     // If this isn't a masked builtin, just return the align operation.
8036     if (Ops.size() == 3)
8037       return Align;
8038 
8039     return EmitX86Select(*this, Ops[4], Align, Ops[3]);
8040   }
8041 
8042   case X86::BI__builtin_ia32_vperm2f128_pd256:
8043   case X86::BI__builtin_ia32_vperm2f128_ps256:
8044   case X86::BI__builtin_ia32_vperm2f128_si256:
8045   case X86::BI__builtin_ia32_permti256: {
8046     unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
8047     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
8048 
8049     // This takes a very simple approach since there are two lanes and a
8050     // shuffle can have 2 inputs. So we reserve the first input for the first
8051     // lane and the second input for the second lane. This may result in
8052     // duplicate sources, but this can be dealt with in the backend.
8053 
8054     Value *OutOps[2];
8055     uint32_t Indices[8];
8056     for (unsigned l = 0; l != 2; ++l) {
8057       // Determine the source for this lane.
8058       if (Imm & (1 << ((l * 4) + 3)))
8059         OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
8060       else if (Imm & (1 << ((l * 4) + 1)))
8061         OutOps[l] = Ops[1];
8062       else
8063         OutOps[l] = Ops[0];
8064 
8065       for (unsigned i = 0; i != NumElts/2; ++i) {
8066         // Start with ith element of the source for this lane.
8067         unsigned Idx = (l * NumElts) + i;
8068         // If bit 0 of the immediate half is set, switch to the high half of
8069         // the source.
8070         if (Imm & (1 << (l * 4)))
8071           Idx += NumElts/2;
8072         Indices[(l * (NumElts/2)) + i] = Idx;
8073       }
8074     }
8075 
8076     return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
8077                                        makeArrayRef(Indices, NumElts),
8078                                        "vperm");
8079   }
8080 
8081   case X86::BI__builtin_ia32_movnti:
8082   case X86::BI__builtin_ia32_movnti64:
8083   case X86::BI__builtin_ia32_movntsd:
8084   case X86::BI__builtin_ia32_movntss: {
8085     llvm::MDNode *Node = llvm::MDNode::get(
8086         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
8087 
8088     Value *Ptr = Ops[0];
8089     Value *Src = Ops[1];
8090 
8091     // Extract the 0'th element of the source vector.
8092     if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
8093         BuiltinID == X86::BI__builtin_ia32_movntss)
8094       Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
8095 
8096     // Convert the type of the pointer to a pointer to the stored type.
8097     Value *BC = Builder.CreateBitCast(
8098         Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
8099 
8100     // Unaligned nontemporal store of the scalar value.
8101     StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
8102     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
8103     SI->setAlignment(1);
8104     return SI;
8105   }
8106 
8107   case X86::BI__builtin_ia32_selectb_128:
8108   case X86::BI__builtin_ia32_selectb_256:
8109   case X86::BI__builtin_ia32_selectb_512:
8110   case X86::BI__builtin_ia32_selectw_128:
8111   case X86::BI__builtin_ia32_selectw_256:
8112   case X86::BI__builtin_ia32_selectw_512:
8113   case X86::BI__builtin_ia32_selectd_128:
8114   case X86::BI__builtin_ia32_selectd_256:
8115   case X86::BI__builtin_ia32_selectd_512:
8116   case X86::BI__builtin_ia32_selectq_128:
8117   case X86::BI__builtin_ia32_selectq_256:
8118   case X86::BI__builtin_ia32_selectq_512:
8119   case X86::BI__builtin_ia32_selectps_128:
8120   case X86::BI__builtin_ia32_selectps_256:
8121   case X86::BI__builtin_ia32_selectps_512:
8122   case X86::BI__builtin_ia32_selectpd_128:
8123   case X86::BI__builtin_ia32_selectpd_256:
8124   case X86::BI__builtin_ia32_selectpd_512:
8125     return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
8126   case X86::BI__builtin_ia32_pcmpeqb128_mask:
8127   case X86::BI__builtin_ia32_pcmpeqb256_mask:
8128   case X86::BI__builtin_ia32_pcmpeqb512_mask:
8129   case X86::BI__builtin_ia32_pcmpeqw128_mask:
8130   case X86::BI__builtin_ia32_pcmpeqw256_mask:
8131   case X86::BI__builtin_ia32_pcmpeqw512_mask:
8132   case X86::BI__builtin_ia32_pcmpeqd128_mask:
8133   case X86::BI__builtin_ia32_pcmpeqd256_mask:
8134   case X86::BI__builtin_ia32_pcmpeqd512_mask:
8135   case X86::BI__builtin_ia32_pcmpeqq128_mask:
8136   case X86::BI__builtin_ia32_pcmpeqq256_mask:
8137   case X86::BI__builtin_ia32_pcmpeqq512_mask:
8138     return EmitX86MaskedCompare(*this, 0, false, Ops);
8139   case X86::BI__builtin_ia32_pcmpgtb128_mask:
8140   case X86::BI__builtin_ia32_pcmpgtb256_mask:
8141   case X86::BI__builtin_ia32_pcmpgtb512_mask:
8142   case X86::BI__builtin_ia32_pcmpgtw128_mask:
8143   case X86::BI__builtin_ia32_pcmpgtw256_mask:
8144   case X86::BI__builtin_ia32_pcmpgtw512_mask:
8145   case X86::BI__builtin_ia32_pcmpgtd128_mask:
8146   case X86::BI__builtin_ia32_pcmpgtd256_mask:
8147   case X86::BI__builtin_ia32_pcmpgtd512_mask:
8148   case X86::BI__builtin_ia32_pcmpgtq128_mask:
8149   case X86::BI__builtin_ia32_pcmpgtq256_mask:
8150   case X86::BI__builtin_ia32_pcmpgtq512_mask:
8151     return EmitX86MaskedCompare(*this, 6, true, Ops);
8152   case X86::BI__builtin_ia32_cmpb128_mask:
8153   case X86::BI__builtin_ia32_cmpb256_mask:
8154   case X86::BI__builtin_ia32_cmpb512_mask:
8155   case X86::BI__builtin_ia32_cmpw128_mask:
8156   case X86::BI__builtin_ia32_cmpw256_mask:
8157   case X86::BI__builtin_ia32_cmpw512_mask:
8158   case X86::BI__builtin_ia32_cmpd128_mask:
8159   case X86::BI__builtin_ia32_cmpd256_mask:
8160   case X86::BI__builtin_ia32_cmpd512_mask:
8161   case X86::BI__builtin_ia32_cmpq128_mask:
8162   case X86::BI__builtin_ia32_cmpq256_mask:
8163   case X86::BI__builtin_ia32_cmpq512_mask: {
8164     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
8165     return EmitX86MaskedCompare(*this, CC, true, Ops);
8166   }
8167   case X86::BI__builtin_ia32_ucmpb128_mask:
8168   case X86::BI__builtin_ia32_ucmpb256_mask:
8169   case X86::BI__builtin_ia32_ucmpb512_mask:
8170   case X86::BI__builtin_ia32_ucmpw128_mask:
8171   case X86::BI__builtin_ia32_ucmpw256_mask:
8172   case X86::BI__builtin_ia32_ucmpw512_mask:
8173   case X86::BI__builtin_ia32_ucmpd128_mask:
8174   case X86::BI__builtin_ia32_ucmpd256_mask:
8175   case X86::BI__builtin_ia32_ucmpd512_mask:
8176   case X86::BI__builtin_ia32_ucmpq128_mask:
8177   case X86::BI__builtin_ia32_ucmpq256_mask:
8178   case X86::BI__builtin_ia32_ucmpq512_mask: {
8179     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
8180     return EmitX86MaskedCompare(*this, CC, false, Ops);
8181   }
8182 
8183   case X86::BI__builtin_ia32_vplzcntd_128_mask:
8184   case X86::BI__builtin_ia32_vplzcntd_256_mask:
8185   case X86::BI__builtin_ia32_vplzcntd_512_mask:
8186   case X86::BI__builtin_ia32_vplzcntq_128_mask:
8187   case X86::BI__builtin_ia32_vplzcntq_256_mask:
8188   case X86::BI__builtin_ia32_vplzcntq_512_mask: {
8189     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
8190     return EmitX86Select(*this, Ops[2],
8191                          Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
8192                          Ops[1]);
8193   }
8194 
8195   case X86::BI__builtin_ia32_pabsb128:
8196   case X86::BI__builtin_ia32_pabsw128:
8197   case X86::BI__builtin_ia32_pabsd128:
8198   case X86::BI__builtin_ia32_pabsb256:
8199   case X86::BI__builtin_ia32_pabsw256:
8200   case X86::BI__builtin_ia32_pabsd256:
8201   case X86::BI__builtin_ia32_pabsq128_mask:
8202   case X86::BI__builtin_ia32_pabsq256_mask:
8203   case X86::BI__builtin_ia32_pabsb512_mask:
8204   case X86::BI__builtin_ia32_pabsw512_mask:
8205   case X86::BI__builtin_ia32_pabsd512_mask:
8206   case X86::BI__builtin_ia32_pabsq512_mask:
8207     return EmitX86Abs(*this, Ops);
8208 
8209   case X86::BI__builtin_ia32_pmaxsb128:
8210   case X86::BI__builtin_ia32_pmaxsw128:
8211   case X86::BI__builtin_ia32_pmaxsd128:
8212   case X86::BI__builtin_ia32_pmaxsq128_mask:
8213   case X86::BI__builtin_ia32_pmaxsb256:
8214   case X86::BI__builtin_ia32_pmaxsw256:
8215   case X86::BI__builtin_ia32_pmaxsd256:
8216   case X86::BI__builtin_ia32_pmaxsq256_mask:
8217   case X86::BI__builtin_ia32_pmaxsb512_mask:
8218   case X86::BI__builtin_ia32_pmaxsw512_mask:
8219   case X86::BI__builtin_ia32_pmaxsd512_mask:
8220   case X86::BI__builtin_ia32_pmaxsq512_mask:
8221     return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
8222   case X86::BI__builtin_ia32_pmaxub128:
8223   case X86::BI__builtin_ia32_pmaxuw128:
8224   case X86::BI__builtin_ia32_pmaxud128:
8225   case X86::BI__builtin_ia32_pmaxuq128_mask:
8226   case X86::BI__builtin_ia32_pmaxub256:
8227   case X86::BI__builtin_ia32_pmaxuw256:
8228   case X86::BI__builtin_ia32_pmaxud256:
8229   case X86::BI__builtin_ia32_pmaxuq256_mask:
8230   case X86::BI__builtin_ia32_pmaxub512_mask:
8231   case X86::BI__builtin_ia32_pmaxuw512_mask:
8232   case X86::BI__builtin_ia32_pmaxud512_mask:
8233   case X86::BI__builtin_ia32_pmaxuq512_mask:
8234     return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
8235   case X86::BI__builtin_ia32_pminsb128:
8236   case X86::BI__builtin_ia32_pminsw128:
8237   case X86::BI__builtin_ia32_pminsd128:
8238   case X86::BI__builtin_ia32_pminsq128_mask:
8239   case X86::BI__builtin_ia32_pminsb256:
8240   case X86::BI__builtin_ia32_pminsw256:
8241   case X86::BI__builtin_ia32_pminsd256:
8242   case X86::BI__builtin_ia32_pminsq256_mask:
8243   case X86::BI__builtin_ia32_pminsb512_mask:
8244   case X86::BI__builtin_ia32_pminsw512_mask:
8245   case X86::BI__builtin_ia32_pminsd512_mask:
8246   case X86::BI__builtin_ia32_pminsq512_mask:
8247     return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
8248   case X86::BI__builtin_ia32_pminub128:
8249   case X86::BI__builtin_ia32_pminuw128:
8250   case X86::BI__builtin_ia32_pminud128:
8251   case X86::BI__builtin_ia32_pminuq128_mask:
8252   case X86::BI__builtin_ia32_pminub256:
8253   case X86::BI__builtin_ia32_pminuw256:
8254   case X86::BI__builtin_ia32_pminud256:
8255   case X86::BI__builtin_ia32_pminuq256_mask:
8256   case X86::BI__builtin_ia32_pminub512_mask:
8257   case X86::BI__builtin_ia32_pminuw512_mask:
8258   case X86::BI__builtin_ia32_pminud512_mask:
8259   case X86::BI__builtin_ia32_pminuq512_mask:
8260     return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
8261 
8262   // 3DNow!
8263   case X86::BI__builtin_ia32_pswapdsf:
8264   case X86::BI__builtin_ia32_pswapdsi: {
8265     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
8266     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
8267     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
8268     return Builder.CreateCall(F, Ops, "pswapd");
8269   }
8270   case X86::BI__builtin_ia32_rdrand16_step:
8271   case X86::BI__builtin_ia32_rdrand32_step:
8272   case X86::BI__builtin_ia32_rdrand64_step:
8273   case X86::BI__builtin_ia32_rdseed16_step:
8274   case X86::BI__builtin_ia32_rdseed32_step:
8275   case X86::BI__builtin_ia32_rdseed64_step: {
8276     Intrinsic::ID ID;
8277     switch (BuiltinID) {
8278     default: llvm_unreachable("Unsupported intrinsic!");
8279     case X86::BI__builtin_ia32_rdrand16_step:
8280       ID = Intrinsic::x86_rdrand_16;
8281       break;
8282     case X86::BI__builtin_ia32_rdrand32_step:
8283       ID = Intrinsic::x86_rdrand_32;
8284       break;
8285     case X86::BI__builtin_ia32_rdrand64_step:
8286       ID = Intrinsic::x86_rdrand_64;
8287       break;
8288     case X86::BI__builtin_ia32_rdseed16_step:
8289       ID = Intrinsic::x86_rdseed_16;
8290       break;
8291     case X86::BI__builtin_ia32_rdseed32_step:
8292       ID = Intrinsic::x86_rdseed_32;
8293       break;
8294     case X86::BI__builtin_ia32_rdseed64_step:
8295       ID = Intrinsic::x86_rdseed_64;
8296       break;
8297     }
8298 
8299     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
8300     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
8301                                       Ops[0]);
8302     return Builder.CreateExtractValue(Call, 1);
8303   }
8304 
8305   // SSE packed comparison intrinsics
8306   case X86::BI__builtin_ia32_cmpeqps:
8307   case X86::BI__builtin_ia32_cmpeqpd:
8308     return getVectorFCmpIR(CmpInst::FCMP_OEQ);
8309   case X86::BI__builtin_ia32_cmpltps:
8310   case X86::BI__builtin_ia32_cmpltpd:
8311     return getVectorFCmpIR(CmpInst::FCMP_OLT);
8312   case X86::BI__builtin_ia32_cmpleps:
8313   case X86::BI__builtin_ia32_cmplepd:
8314     return getVectorFCmpIR(CmpInst::FCMP_OLE);
8315   case X86::BI__builtin_ia32_cmpunordps:
8316   case X86::BI__builtin_ia32_cmpunordpd:
8317     return getVectorFCmpIR(CmpInst::FCMP_UNO);
8318   case X86::BI__builtin_ia32_cmpneqps:
8319   case X86::BI__builtin_ia32_cmpneqpd:
8320     return getVectorFCmpIR(CmpInst::FCMP_UNE);
8321   case X86::BI__builtin_ia32_cmpnltps:
8322   case X86::BI__builtin_ia32_cmpnltpd:
8323     return getVectorFCmpIR(CmpInst::FCMP_UGE);
8324   case X86::BI__builtin_ia32_cmpnleps:
8325   case X86::BI__builtin_ia32_cmpnlepd:
8326     return getVectorFCmpIR(CmpInst::FCMP_UGT);
8327   case X86::BI__builtin_ia32_cmpordps:
8328   case X86::BI__builtin_ia32_cmpordpd:
8329     return getVectorFCmpIR(CmpInst::FCMP_ORD);
8330   case X86::BI__builtin_ia32_cmpps:
8331   case X86::BI__builtin_ia32_cmpps256:
8332   case X86::BI__builtin_ia32_cmppd:
8333   case X86::BI__builtin_ia32_cmppd256: {
8334     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
8335     // If this one of the SSE immediates, we can use native IR.
8336     if (CC < 8) {
8337       FCmpInst::Predicate Pred;
8338       switch (CC) {
8339       case 0: Pred = FCmpInst::FCMP_OEQ; break;
8340       case 1: Pred = FCmpInst::FCMP_OLT; break;
8341       case 2: Pred = FCmpInst::FCMP_OLE; break;
8342       case 3: Pred = FCmpInst::FCMP_UNO; break;
8343       case 4: Pred = FCmpInst::FCMP_UNE; break;
8344       case 5: Pred = FCmpInst::FCMP_UGE; break;
8345       case 6: Pred = FCmpInst::FCMP_UGT; break;
8346       case 7: Pred = FCmpInst::FCMP_ORD; break;
8347       }
8348       return getVectorFCmpIR(Pred);
8349     }
8350 
8351     // We can't handle 8-31 immediates with native IR, use the intrinsic.
8352     // Except for predicates that create constants.
8353     Intrinsic::ID ID;
8354     switch (BuiltinID) {
8355     default: llvm_unreachable("Unsupported intrinsic!");
8356     case X86::BI__builtin_ia32_cmpps:
8357       ID = Intrinsic::x86_sse_cmp_ps;
8358       break;
8359     case X86::BI__builtin_ia32_cmpps256:
8360       // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
8361       // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
8362       if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
8363          Value *Constant = (CC == 0xf || CC == 0x1f) ?
8364                 llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) :
8365                 llvm::Constant::getNullValue(Builder.getInt32Ty());
8366          Value *Vec = Builder.CreateVectorSplat(
8367                         Ops[0]->getType()->getVectorNumElements(), Constant);
8368          return Builder.CreateBitCast(Vec, Ops[0]->getType());
8369       }
8370       ID = Intrinsic::x86_avx_cmp_ps_256;
8371       break;
8372     case X86::BI__builtin_ia32_cmppd:
8373       ID = Intrinsic::x86_sse2_cmp_pd;
8374       break;
8375     case X86::BI__builtin_ia32_cmppd256:
8376       // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
8377       // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
8378       if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
8379          Value *Constant = (CC == 0xf || CC == 0x1f) ?
8380                 llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) :
8381                 llvm::Constant::getNullValue(Builder.getInt64Ty());
8382          Value *Vec = Builder.CreateVectorSplat(
8383                         Ops[0]->getType()->getVectorNumElements(), Constant);
8384          return Builder.CreateBitCast(Vec, Ops[0]->getType());
8385       }
8386       ID = Intrinsic::x86_avx_cmp_pd_256;
8387       break;
8388     }
8389 
8390     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
8391   }
8392 
8393   // SSE scalar comparison intrinsics
8394   case X86::BI__builtin_ia32_cmpeqss:
8395     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
8396   case X86::BI__builtin_ia32_cmpltss:
8397     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
8398   case X86::BI__builtin_ia32_cmpless:
8399     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
8400   case X86::BI__builtin_ia32_cmpunordss:
8401     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
8402   case X86::BI__builtin_ia32_cmpneqss:
8403     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
8404   case X86::BI__builtin_ia32_cmpnltss:
8405     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
8406   case X86::BI__builtin_ia32_cmpnless:
8407     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
8408   case X86::BI__builtin_ia32_cmpordss:
8409     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
8410   case X86::BI__builtin_ia32_cmpeqsd:
8411     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
8412   case X86::BI__builtin_ia32_cmpltsd:
8413     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
8414   case X86::BI__builtin_ia32_cmplesd:
8415     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
8416   case X86::BI__builtin_ia32_cmpunordsd:
8417     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
8418   case X86::BI__builtin_ia32_cmpneqsd:
8419     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
8420   case X86::BI__builtin_ia32_cmpnltsd:
8421     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
8422   case X86::BI__builtin_ia32_cmpnlesd:
8423     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
8424   case X86::BI__builtin_ia32_cmpordsd:
8425     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
8426 
8427   case X86::BI__emul:
8428   case X86::BI__emulu: {
8429     llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
8430     bool isSigned = (BuiltinID == X86::BI__emul);
8431     Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
8432     Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
8433     return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
8434   }
8435   case X86::BI__mulh:
8436   case X86::BI__umulh:
8437   case X86::BI_mul128:
8438   case X86::BI_umul128: {
8439     llvm::Type *ResType = ConvertType(E->getType());
8440     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
8441 
8442     bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
8443     Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
8444     Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
8445 
8446     Value *MulResult, *HigherBits;
8447     if (IsSigned) {
8448       MulResult = Builder.CreateNSWMul(LHS, RHS);
8449       HigherBits = Builder.CreateAShr(MulResult, 64);
8450     } else {
8451       MulResult = Builder.CreateNUWMul(LHS, RHS);
8452       HigherBits = Builder.CreateLShr(MulResult, 64);
8453     }
8454     HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
8455 
8456     if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
8457       return HigherBits;
8458 
8459     Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
8460     Builder.CreateStore(HigherBits, HighBitsAddress);
8461     return Builder.CreateIntCast(MulResult, ResType, IsSigned);
8462   }
8463 
8464   case X86::BI__faststorefence: {
8465     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8466                                llvm::SyncScope::System);
8467   }
8468   case X86::BI_ReadWriteBarrier:
8469   case X86::BI_ReadBarrier:
8470   case X86::BI_WriteBarrier: {
8471     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8472                                llvm::SyncScope::SingleThread);
8473   }
8474   case X86::BI_BitScanForward:
8475   case X86::BI_BitScanForward64:
8476     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
8477   case X86::BI_BitScanReverse:
8478   case X86::BI_BitScanReverse64:
8479     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
8480 
8481   case X86::BI_InterlockedAnd64:
8482     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
8483   case X86::BI_InterlockedExchange64:
8484     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
8485   case X86::BI_InterlockedExchangeAdd64:
8486     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
8487   case X86::BI_InterlockedExchangeSub64:
8488     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
8489   case X86::BI_InterlockedOr64:
8490     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
8491   case X86::BI_InterlockedXor64:
8492     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
8493   case X86::BI_InterlockedDecrement64:
8494     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
8495   case X86::BI_InterlockedIncrement64:
8496     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
8497 
8498   case X86::BI_AddressOfReturnAddress: {
8499     Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
8500     return Builder.CreateCall(F);
8501   }
8502   case X86::BI__stosb: {
8503     // We treat __stosb as a volatile memset - it may not generate "rep stosb"
8504     // instruction, but it will create a memset that won't be optimized away.
8505     return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
8506   }
8507   case X86::BI__ud2:
8508     // llvm.trap makes a ud2a instruction on x86.
8509     return EmitTrapCall(Intrinsic::trap);
8510   case X86::BI__int2c: {
8511     // This syscall signals a driver assertion failure in x86 NT kernels.
8512     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
8513     llvm::InlineAsm *IA =
8514         llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true);
8515     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
8516         getLLVMContext(), llvm::AttributeList::FunctionIndex,
8517         llvm::Attribute::NoReturn);
8518     CallSite CS = Builder.CreateCall(IA);
8519     CS.setAttributes(NoReturnAttr);
8520     return CS.getInstruction();
8521   }
8522   case X86::BI__readfsbyte:
8523   case X86::BI__readfsword:
8524   case X86::BI__readfsdword:
8525   case X86::BI__readfsqword: {
8526     llvm::Type *IntTy = ConvertType(E->getType());
8527     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8528                                         llvm::PointerType::get(IntTy, 257));
8529     LoadInst *Load = Builder.CreateAlignedLoad(
8530         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8531     Load->setVolatile(true);
8532     return Load;
8533   }
8534   case X86::BI__readgsbyte:
8535   case X86::BI__readgsword:
8536   case X86::BI__readgsdword:
8537   case X86::BI__readgsqword: {
8538     llvm::Type *IntTy = ConvertType(E->getType());
8539     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8540                                         llvm::PointerType::get(IntTy, 256));
8541     LoadInst *Load = Builder.CreateAlignedLoad(
8542         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8543     Load->setVolatile(true);
8544     return Load;
8545   }
8546   }
8547 }
8548 
8549 
8550 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
8551                                            const CallExpr *E) {
8552   SmallVector<Value*, 4> Ops;
8553 
8554   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
8555     Ops.push_back(EmitScalarExpr(E->getArg(i)));
8556 
8557   Intrinsic::ID ID = Intrinsic::not_intrinsic;
8558 
8559   switch (BuiltinID) {
8560   default: return nullptr;
8561 
8562   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
8563   // call __builtin_readcyclecounter.
8564   case PPC::BI__builtin_ppc_get_timebase:
8565     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
8566 
8567   // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
8568   case PPC::BI__builtin_altivec_lvx:
8569   case PPC::BI__builtin_altivec_lvxl:
8570   case PPC::BI__builtin_altivec_lvebx:
8571   case PPC::BI__builtin_altivec_lvehx:
8572   case PPC::BI__builtin_altivec_lvewx:
8573   case PPC::BI__builtin_altivec_lvsl:
8574   case PPC::BI__builtin_altivec_lvsr:
8575   case PPC::BI__builtin_vsx_lxvd2x:
8576   case PPC::BI__builtin_vsx_lxvw4x:
8577   case PPC::BI__builtin_vsx_lxvd2x_be:
8578   case PPC::BI__builtin_vsx_lxvw4x_be:
8579   case PPC::BI__builtin_vsx_lxvl:
8580   case PPC::BI__builtin_vsx_lxvll:
8581   {
8582     if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
8583        BuiltinID == PPC::BI__builtin_vsx_lxvll){
8584       Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
8585     }else {
8586       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8587       Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
8588       Ops.pop_back();
8589     }
8590 
8591     switch (BuiltinID) {
8592     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
8593     case PPC::BI__builtin_altivec_lvx:
8594       ID = Intrinsic::ppc_altivec_lvx;
8595       break;
8596     case PPC::BI__builtin_altivec_lvxl:
8597       ID = Intrinsic::ppc_altivec_lvxl;
8598       break;
8599     case PPC::BI__builtin_altivec_lvebx:
8600       ID = Intrinsic::ppc_altivec_lvebx;
8601       break;
8602     case PPC::BI__builtin_altivec_lvehx:
8603       ID = Intrinsic::ppc_altivec_lvehx;
8604       break;
8605     case PPC::BI__builtin_altivec_lvewx:
8606       ID = Intrinsic::ppc_altivec_lvewx;
8607       break;
8608     case PPC::BI__builtin_altivec_lvsl:
8609       ID = Intrinsic::ppc_altivec_lvsl;
8610       break;
8611     case PPC::BI__builtin_altivec_lvsr:
8612       ID = Intrinsic::ppc_altivec_lvsr;
8613       break;
8614     case PPC::BI__builtin_vsx_lxvd2x:
8615       ID = Intrinsic::ppc_vsx_lxvd2x;
8616       break;
8617     case PPC::BI__builtin_vsx_lxvw4x:
8618       ID = Intrinsic::ppc_vsx_lxvw4x;
8619       break;
8620     case PPC::BI__builtin_vsx_lxvd2x_be:
8621       ID = Intrinsic::ppc_vsx_lxvd2x_be;
8622       break;
8623     case PPC::BI__builtin_vsx_lxvw4x_be:
8624       ID = Intrinsic::ppc_vsx_lxvw4x_be;
8625       break;
8626     case PPC::BI__builtin_vsx_lxvl:
8627       ID = Intrinsic::ppc_vsx_lxvl;
8628       break;
8629     case PPC::BI__builtin_vsx_lxvll:
8630       ID = Intrinsic::ppc_vsx_lxvll;
8631       break;
8632     }
8633     llvm::Function *F = CGM.getIntrinsic(ID);
8634     return Builder.CreateCall(F, Ops, "");
8635   }
8636 
8637   // vec_st, vec_xst_be
8638   case PPC::BI__builtin_altivec_stvx:
8639   case PPC::BI__builtin_altivec_stvxl:
8640   case PPC::BI__builtin_altivec_stvebx:
8641   case PPC::BI__builtin_altivec_stvehx:
8642   case PPC::BI__builtin_altivec_stvewx:
8643   case PPC::BI__builtin_vsx_stxvd2x:
8644   case PPC::BI__builtin_vsx_stxvw4x:
8645   case PPC::BI__builtin_vsx_stxvd2x_be:
8646   case PPC::BI__builtin_vsx_stxvw4x_be:
8647   case PPC::BI__builtin_vsx_stxvl:
8648   case PPC::BI__builtin_vsx_stxvll:
8649   {
8650     if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
8651       BuiltinID == PPC::BI__builtin_vsx_stxvll ){
8652       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8653     }else {
8654       Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
8655       Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
8656       Ops.pop_back();
8657     }
8658 
8659     switch (BuiltinID) {
8660     default: llvm_unreachable("Unsupported st intrinsic!");
8661     case PPC::BI__builtin_altivec_stvx:
8662       ID = Intrinsic::ppc_altivec_stvx;
8663       break;
8664     case PPC::BI__builtin_altivec_stvxl:
8665       ID = Intrinsic::ppc_altivec_stvxl;
8666       break;
8667     case PPC::BI__builtin_altivec_stvebx:
8668       ID = Intrinsic::ppc_altivec_stvebx;
8669       break;
8670     case PPC::BI__builtin_altivec_stvehx:
8671       ID = Intrinsic::ppc_altivec_stvehx;
8672       break;
8673     case PPC::BI__builtin_altivec_stvewx:
8674       ID = Intrinsic::ppc_altivec_stvewx;
8675       break;
8676     case PPC::BI__builtin_vsx_stxvd2x:
8677       ID = Intrinsic::ppc_vsx_stxvd2x;
8678       break;
8679     case PPC::BI__builtin_vsx_stxvw4x:
8680       ID = Intrinsic::ppc_vsx_stxvw4x;
8681       break;
8682     case PPC::BI__builtin_vsx_stxvd2x_be:
8683       ID = Intrinsic::ppc_vsx_stxvd2x_be;
8684       break;
8685     case PPC::BI__builtin_vsx_stxvw4x_be:
8686       ID = Intrinsic::ppc_vsx_stxvw4x_be;
8687       break;
8688     case PPC::BI__builtin_vsx_stxvl:
8689       ID = Intrinsic::ppc_vsx_stxvl;
8690       break;
8691     case PPC::BI__builtin_vsx_stxvll:
8692       ID = Intrinsic::ppc_vsx_stxvll;
8693       break;
8694     }
8695     llvm::Function *F = CGM.getIntrinsic(ID);
8696     return Builder.CreateCall(F, Ops, "");
8697   }
8698   // Square root
8699   case PPC::BI__builtin_vsx_xvsqrtsp:
8700   case PPC::BI__builtin_vsx_xvsqrtdp: {
8701     llvm::Type *ResultType = ConvertType(E->getType());
8702     Value *X = EmitScalarExpr(E->getArg(0));
8703     ID = Intrinsic::sqrt;
8704     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8705     return Builder.CreateCall(F, X);
8706   }
8707   // Count leading zeros
8708   case PPC::BI__builtin_altivec_vclzb:
8709   case PPC::BI__builtin_altivec_vclzh:
8710   case PPC::BI__builtin_altivec_vclzw:
8711   case PPC::BI__builtin_altivec_vclzd: {
8712     llvm::Type *ResultType = ConvertType(E->getType());
8713     Value *X = EmitScalarExpr(E->getArg(0));
8714     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8715     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8716     return Builder.CreateCall(F, {X, Undef});
8717   }
8718   case PPC::BI__builtin_altivec_vctzb:
8719   case PPC::BI__builtin_altivec_vctzh:
8720   case PPC::BI__builtin_altivec_vctzw:
8721   case PPC::BI__builtin_altivec_vctzd: {
8722     llvm::Type *ResultType = ConvertType(E->getType());
8723     Value *X = EmitScalarExpr(E->getArg(0));
8724     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8725     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8726     return Builder.CreateCall(F, {X, Undef});
8727   }
8728   case PPC::BI__builtin_altivec_vpopcntb:
8729   case PPC::BI__builtin_altivec_vpopcnth:
8730   case PPC::BI__builtin_altivec_vpopcntw:
8731   case PPC::BI__builtin_altivec_vpopcntd: {
8732     llvm::Type *ResultType = ConvertType(E->getType());
8733     Value *X = EmitScalarExpr(E->getArg(0));
8734     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8735     return Builder.CreateCall(F, X);
8736   }
8737   // Copy sign
8738   case PPC::BI__builtin_vsx_xvcpsgnsp:
8739   case PPC::BI__builtin_vsx_xvcpsgndp: {
8740     llvm::Type *ResultType = ConvertType(E->getType());
8741     Value *X = EmitScalarExpr(E->getArg(0));
8742     Value *Y = EmitScalarExpr(E->getArg(1));
8743     ID = Intrinsic::copysign;
8744     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8745     return Builder.CreateCall(F, {X, Y});
8746   }
8747   // Rounding/truncation
8748   case PPC::BI__builtin_vsx_xvrspip:
8749   case PPC::BI__builtin_vsx_xvrdpip:
8750   case PPC::BI__builtin_vsx_xvrdpim:
8751   case PPC::BI__builtin_vsx_xvrspim:
8752   case PPC::BI__builtin_vsx_xvrdpi:
8753   case PPC::BI__builtin_vsx_xvrspi:
8754   case PPC::BI__builtin_vsx_xvrdpic:
8755   case PPC::BI__builtin_vsx_xvrspic:
8756   case PPC::BI__builtin_vsx_xvrdpiz:
8757   case PPC::BI__builtin_vsx_xvrspiz: {
8758     llvm::Type *ResultType = ConvertType(E->getType());
8759     Value *X = EmitScalarExpr(E->getArg(0));
8760     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
8761         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
8762       ID = Intrinsic::floor;
8763     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
8764              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
8765       ID = Intrinsic::round;
8766     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
8767              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
8768       ID = Intrinsic::nearbyint;
8769     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
8770              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
8771       ID = Intrinsic::ceil;
8772     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
8773              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
8774       ID = Intrinsic::trunc;
8775     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8776     return Builder.CreateCall(F, X);
8777   }
8778 
8779   // Absolute value
8780   case PPC::BI__builtin_vsx_xvabsdp:
8781   case PPC::BI__builtin_vsx_xvabssp: {
8782     llvm::Type *ResultType = ConvertType(E->getType());
8783     Value *X = EmitScalarExpr(E->getArg(0));
8784     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8785     return Builder.CreateCall(F, X);
8786   }
8787 
8788   // FMA variations
8789   case PPC::BI__builtin_vsx_xvmaddadp:
8790   case PPC::BI__builtin_vsx_xvmaddasp:
8791   case PPC::BI__builtin_vsx_xvnmaddadp:
8792   case PPC::BI__builtin_vsx_xvnmaddasp:
8793   case PPC::BI__builtin_vsx_xvmsubadp:
8794   case PPC::BI__builtin_vsx_xvmsubasp:
8795   case PPC::BI__builtin_vsx_xvnmsubadp:
8796   case PPC::BI__builtin_vsx_xvnmsubasp: {
8797     llvm::Type *ResultType = ConvertType(E->getType());
8798     Value *X = EmitScalarExpr(E->getArg(0));
8799     Value *Y = EmitScalarExpr(E->getArg(1));
8800     Value *Z = EmitScalarExpr(E->getArg(2));
8801     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8802     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8803     switch (BuiltinID) {
8804       case PPC::BI__builtin_vsx_xvmaddadp:
8805       case PPC::BI__builtin_vsx_xvmaddasp:
8806         return Builder.CreateCall(F, {X, Y, Z});
8807       case PPC::BI__builtin_vsx_xvnmaddadp:
8808       case PPC::BI__builtin_vsx_xvnmaddasp:
8809         return Builder.CreateFSub(Zero,
8810                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
8811       case PPC::BI__builtin_vsx_xvmsubadp:
8812       case PPC::BI__builtin_vsx_xvmsubasp:
8813         return Builder.CreateCall(F,
8814                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8815       case PPC::BI__builtin_vsx_xvnmsubadp:
8816       case PPC::BI__builtin_vsx_xvnmsubasp:
8817         Value *FsubRes =
8818           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8819         return Builder.CreateFSub(Zero, FsubRes, "sub");
8820     }
8821     llvm_unreachable("Unknown FMA operation");
8822     return nullptr; // Suppress no-return warning
8823   }
8824 
8825   case PPC::BI__builtin_vsx_insertword: {
8826     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
8827 
8828     // Third argument is a compile time constant int. It must be clamped to
8829     // to the range [0, 12].
8830     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8831     assert(ArgCI &&
8832            "Third arg to xxinsertw intrinsic must be constant integer");
8833     const int64_t MaxIndex = 12;
8834     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8835 
8836     // The builtin semantics don't exactly match the xxinsertw instructions
8837     // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
8838     // word from the first argument, and inserts it in the second argument. The
8839     // instruction extracts the word from its second input register and inserts
8840     // it into its first input register, so swap the first and second arguments.
8841     std::swap(Ops[0], Ops[1]);
8842 
8843     // Need to cast the second argument from a vector of unsigned int to a
8844     // vector of long long.
8845     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8846 
8847     if (getTarget().isLittleEndian()) {
8848       // Create a shuffle mask of (1, 0)
8849       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8850                                    ConstantInt::get(Int32Ty, 0)
8851                                  };
8852       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8853 
8854       // Reverse the double words in the vector we will extract from.
8855       Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8856       Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
8857 
8858       // Reverse the index.
8859       Index = MaxIndex - Index;
8860     }
8861 
8862     // Intrinsic expects the first arg to be a vector of int.
8863     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8864     Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
8865     return Builder.CreateCall(F, Ops);
8866   }
8867 
8868   case PPC::BI__builtin_vsx_extractuword: {
8869     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
8870 
8871     // Intrinsic expects the first argument to be a vector of doublewords.
8872     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8873 
8874     // The second argument is a compile time constant int that needs to
8875     // be clamped to the range [0, 12].
8876     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
8877     assert(ArgCI &&
8878            "Second Arg to xxextractuw intrinsic must be a constant integer!");
8879     const int64_t MaxIndex = 12;
8880     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8881 
8882     if (getTarget().isLittleEndian()) {
8883       // Reverse the index.
8884       Index = MaxIndex - Index;
8885       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8886 
8887       // Emit the call, then reverse the double words of the results vector.
8888       Value *Call = Builder.CreateCall(F, Ops);
8889 
8890       // Create a shuffle mask of (1, 0)
8891       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8892                                    ConstantInt::get(Int32Ty, 0)
8893                                  };
8894       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8895 
8896       Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
8897       return ShuffleCall;
8898     } else {
8899       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8900       return Builder.CreateCall(F, Ops);
8901     }
8902   }
8903 
8904   case PPC::BI__builtin_vsx_xxpermdi: {
8905     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8906     assert(ArgCI && "Third arg must be constant integer!");
8907 
8908     unsigned Index = ArgCI->getZExtValue();
8909     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8910     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8911 
8912     // Element zero comes from the first input vector and element one comes from
8913     // the second. The element indices within each vector are numbered in big
8914     // endian order so the shuffle mask must be adjusted for this on little
8915     // endian platforms (i.e. index is complemented and source vector reversed).
8916     unsigned ElemIdx0;
8917     unsigned ElemIdx1;
8918     if (getTarget().isLittleEndian()) {
8919       ElemIdx0 = (~Index & 1) + 2;
8920       ElemIdx1 = (~Index & 2) >> 1;
8921     } else { // BigEndian
8922       ElemIdx0 = (Index & 2) >> 1;
8923       ElemIdx1 = 2 + (Index & 1);
8924     }
8925 
8926     Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
8927                                 ConstantInt::get(Int32Ty, ElemIdx1)};
8928     Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8929 
8930     Value *ShuffleCall =
8931         Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
8932     QualType BIRetType = E->getType();
8933     auto RetTy = ConvertType(BIRetType);
8934     return Builder.CreateBitCast(ShuffleCall, RetTy);
8935   }
8936 
8937   case PPC::BI__builtin_vsx_xxsldwi: {
8938     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8939     assert(ArgCI && "Third argument must be a compile time constant");
8940     unsigned Index = ArgCI->getZExtValue() & 0x3;
8941     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8942     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
8943 
8944     // Create a shuffle mask
8945     unsigned ElemIdx0;
8946     unsigned ElemIdx1;
8947     unsigned ElemIdx2;
8948     unsigned ElemIdx3;
8949     if (getTarget().isLittleEndian()) {
8950       // Little endian element N comes from element 8+N-Index of the
8951       // concatenated wide vector (of course, using modulo arithmetic on
8952       // the total number of elements).
8953       ElemIdx0 = (8 - Index) % 8;
8954       ElemIdx1 = (9 - Index) % 8;
8955       ElemIdx2 = (10 - Index) % 8;
8956       ElemIdx3 = (11 - Index) % 8;
8957     } else {
8958       // Big endian ElemIdx<N> = Index + N
8959       ElemIdx0 = Index;
8960       ElemIdx1 = Index + 1;
8961       ElemIdx2 = Index + 2;
8962       ElemIdx3 = Index + 3;
8963     }
8964 
8965     Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
8966                                 ConstantInt::get(Int32Ty, ElemIdx1),
8967                                 ConstantInt::get(Int32Ty, ElemIdx2),
8968                                 ConstantInt::get(Int32Ty, ElemIdx3)};
8969 
8970     Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8971     Value *ShuffleCall =
8972         Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
8973     QualType BIRetType = E->getType();
8974     auto RetTy = ConvertType(BIRetType);
8975     return Builder.CreateBitCast(ShuffleCall, RetTy);
8976   }
8977   }
8978 }
8979 
8980 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
8981                                               const CallExpr *E) {
8982   switch (BuiltinID) {
8983   case AMDGPU::BI__builtin_amdgcn_div_scale:
8984   case AMDGPU::BI__builtin_amdgcn_div_scalef: {
8985     // Translate from the intrinsics's struct return to the builtin's out
8986     // argument.
8987 
8988     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
8989 
8990     llvm::Value *X = EmitScalarExpr(E->getArg(0));
8991     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
8992     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
8993 
8994     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
8995                                            X->getType());
8996 
8997     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
8998 
8999     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
9000     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
9001 
9002     llvm::Type *RealFlagType
9003       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
9004 
9005     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
9006     Builder.CreateStore(FlagExt, FlagOutPtr);
9007     return Result;
9008   }
9009   case AMDGPU::BI__builtin_amdgcn_div_fmas:
9010   case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
9011     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
9012     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
9013     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
9014     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
9015 
9016     llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
9017                                       Src0->getType());
9018     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
9019     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
9020   }
9021 
9022   case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
9023     return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
9024   case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
9025     llvm::SmallVector<llvm::Value *, 5> Args;
9026     for (unsigned I = 0; I != 5; ++I)
9027       Args.push_back(EmitScalarExpr(E->getArg(I)));
9028     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,
9029                                     Args[0]->getType());
9030     return Builder.CreateCall(F, Args);
9031   }
9032   case AMDGPU::BI__builtin_amdgcn_div_fixup:
9033   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
9034   case AMDGPU::BI__builtin_amdgcn_div_fixuph:
9035     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
9036   case AMDGPU::BI__builtin_amdgcn_trig_preop:
9037   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
9038     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
9039   case AMDGPU::BI__builtin_amdgcn_rcp:
9040   case AMDGPU::BI__builtin_amdgcn_rcpf:
9041   case AMDGPU::BI__builtin_amdgcn_rcph:
9042     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
9043   case AMDGPU::BI__builtin_amdgcn_rsq:
9044   case AMDGPU::BI__builtin_amdgcn_rsqf:
9045   case AMDGPU::BI__builtin_amdgcn_rsqh:
9046     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
9047   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
9048   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
9049     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
9050   case AMDGPU::BI__builtin_amdgcn_sinf:
9051   case AMDGPU::BI__builtin_amdgcn_sinh:
9052     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
9053   case AMDGPU::BI__builtin_amdgcn_cosf:
9054   case AMDGPU::BI__builtin_amdgcn_cosh:
9055     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
9056   case AMDGPU::BI__builtin_amdgcn_log_clampf:
9057     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
9058   case AMDGPU::BI__builtin_amdgcn_ldexp:
9059   case AMDGPU::BI__builtin_amdgcn_ldexpf:
9060   case AMDGPU::BI__builtin_amdgcn_ldexph:
9061     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
9062   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
9063   case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
9064   case AMDGPU::BI__builtin_amdgcn_frexp_manth:
9065     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
9066   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
9067   case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
9068     Value *Src0 = EmitScalarExpr(E->getArg(0));
9069     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
9070                                 { Builder.getInt32Ty(), Src0->getType() });
9071     return Builder.CreateCall(F, Src0);
9072   }
9073   case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
9074     Value *Src0 = EmitScalarExpr(E->getArg(0));
9075     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
9076                                 { Builder.getInt16Ty(), Src0->getType() });
9077     return Builder.CreateCall(F, Src0);
9078   }
9079   case AMDGPU::BI__builtin_amdgcn_fract:
9080   case AMDGPU::BI__builtin_amdgcn_fractf:
9081   case AMDGPU::BI__builtin_amdgcn_fracth:
9082     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
9083   case AMDGPU::BI__builtin_amdgcn_lerp:
9084     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
9085   case AMDGPU::BI__builtin_amdgcn_uicmp:
9086   case AMDGPU::BI__builtin_amdgcn_uicmpl:
9087   case AMDGPU::BI__builtin_amdgcn_sicmp:
9088   case AMDGPU::BI__builtin_amdgcn_sicmpl:
9089     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
9090   case AMDGPU::BI__builtin_amdgcn_fcmp:
9091   case AMDGPU::BI__builtin_amdgcn_fcmpf:
9092     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
9093   case AMDGPU::BI__builtin_amdgcn_class:
9094   case AMDGPU::BI__builtin_amdgcn_classf:
9095   case AMDGPU::BI__builtin_amdgcn_classh:
9096     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
9097   case AMDGPU::BI__builtin_amdgcn_fmed3f:
9098   case AMDGPU::BI__builtin_amdgcn_fmed3h:
9099     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
9100   case AMDGPU::BI__builtin_amdgcn_read_exec: {
9101     CallInst *CI = cast<CallInst>(
9102       EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
9103     CI->setConvergent();
9104     return CI;
9105   }
9106 
9107   // amdgcn workitem
9108   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
9109     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
9110   case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
9111     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
9112   case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
9113     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
9114 
9115   // r600 intrinsics
9116   case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
9117   case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
9118     return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
9119   case AMDGPU::BI__builtin_r600_read_tidig_x:
9120     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
9121   case AMDGPU::BI__builtin_r600_read_tidig_y:
9122     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
9123   case AMDGPU::BI__builtin_r600_read_tidig_z:
9124     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
9125   default:
9126     return nullptr;
9127   }
9128 }
9129 
9130 /// Handle a SystemZ function in which the final argument is a pointer
9131 /// to an int that receives the post-instruction CC value.  At the LLVM level
9132 /// this is represented as a function that returns a {result, cc} pair.
9133 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
9134                                          unsigned IntrinsicID,
9135                                          const CallExpr *E) {
9136   unsigned NumArgs = E->getNumArgs() - 1;
9137   SmallVector<Value *, 8> Args(NumArgs);
9138   for (unsigned I = 0; I < NumArgs; ++I)
9139     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
9140   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
9141   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
9142   Value *Call = CGF.Builder.CreateCall(F, Args);
9143   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
9144   CGF.Builder.CreateStore(CC, CCPtr);
9145   return CGF.Builder.CreateExtractValue(Call, 0);
9146 }
9147 
9148 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
9149                                                const CallExpr *E) {
9150   switch (BuiltinID) {
9151   case SystemZ::BI__builtin_tbegin: {
9152     Value *TDB = EmitScalarExpr(E->getArg(0));
9153     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
9154     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
9155     return Builder.CreateCall(F, {TDB, Control});
9156   }
9157   case SystemZ::BI__builtin_tbegin_nofloat: {
9158     Value *TDB = EmitScalarExpr(E->getArg(0));
9159     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
9160     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
9161     return Builder.CreateCall(F, {TDB, Control});
9162   }
9163   case SystemZ::BI__builtin_tbeginc: {
9164     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
9165     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
9166     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
9167     return Builder.CreateCall(F, {TDB, Control});
9168   }
9169   case SystemZ::BI__builtin_tabort: {
9170     Value *Data = EmitScalarExpr(E->getArg(0));
9171     Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
9172     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
9173   }
9174   case SystemZ::BI__builtin_non_tx_store: {
9175     Value *Address = EmitScalarExpr(E->getArg(0));
9176     Value *Data = EmitScalarExpr(E->getArg(1));
9177     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
9178     return Builder.CreateCall(F, {Data, Address});
9179   }
9180 
9181   // Vector builtins.  Note that most vector builtins are mapped automatically
9182   // to target-specific LLVM intrinsics.  The ones handled specially here can
9183   // be represented via standard LLVM IR, which is preferable to enable common
9184   // LLVM optimizations.
9185 
9186   case SystemZ::BI__builtin_s390_vpopctb:
9187   case SystemZ::BI__builtin_s390_vpopcth:
9188   case SystemZ::BI__builtin_s390_vpopctf:
9189   case SystemZ::BI__builtin_s390_vpopctg: {
9190     llvm::Type *ResultType = ConvertType(E->getType());
9191     Value *X = EmitScalarExpr(E->getArg(0));
9192     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
9193     return Builder.CreateCall(F, X);
9194   }
9195 
9196   case SystemZ::BI__builtin_s390_vclzb:
9197   case SystemZ::BI__builtin_s390_vclzh:
9198   case SystemZ::BI__builtin_s390_vclzf:
9199   case SystemZ::BI__builtin_s390_vclzg: {
9200     llvm::Type *ResultType = ConvertType(E->getType());
9201     Value *X = EmitScalarExpr(E->getArg(0));
9202     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
9203     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
9204     return Builder.CreateCall(F, {X, Undef});
9205   }
9206 
9207   case SystemZ::BI__builtin_s390_vctzb:
9208   case SystemZ::BI__builtin_s390_vctzh:
9209   case SystemZ::BI__builtin_s390_vctzf:
9210   case SystemZ::BI__builtin_s390_vctzg: {
9211     llvm::Type *ResultType = ConvertType(E->getType());
9212     Value *X = EmitScalarExpr(E->getArg(0));
9213     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
9214     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
9215     return Builder.CreateCall(F, {X, Undef});
9216   }
9217 
9218   case SystemZ::BI__builtin_s390_vfsqsb:
9219   case SystemZ::BI__builtin_s390_vfsqdb: {
9220     llvm::Type *ResultType = ConvertType(E->getType());
9221     Value *X = EmitScalarExpr(E->getArg(0));
9222     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
9223     return Builder.CreateCall(F, X);
9224   }
9225   case SystemZ::BI__builtin_s390_vfmasb:
9226   case SystemZ::BI__builtin_s390_vfmadb: {
9227     llvm::Type *ResultType = ConvertType(E->getType());
9228     Value *X = EmitScalarExpr(E->getArg(0));
9229     Value *Y = EmitScalarExpr(E->getArg(1));
9230     Value *Z = EmitScalarExpr(E->getArg(2));
9231     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
9232     return Builder.CreateCall(F, {X, Y, Z});
9233   }
9234   case SystemZ::BI__builtin_s390_vfmssb:
9235   case SystemZ::BI__builtin_s390_vfmsdb: {
9236     llvm::Type *ResultType = ConvertType(E->getType());
9237     Value *X = EmitScalarExpr(E->getArg(0));
9238     Value *Y = EmitScalarExpr(E->getArg(1));
9239     Value *Z = EmitScalarExpr(E->getArg(2));
9240     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
9241     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
9242     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
9243   }
9244   case SystemZ::BI__builtin_s390_vfnmasb:
9245   case SystemZ::BI__builtin_s390_vfnmadb: {
9246     llvm::Type *ResultType = ConvertType(E->getType());
9247     Value *X = EmitScalarExpr(E->getArg(0));
9248     Value *Y = EmitScalarExpr(E->getArg(1));
9249     Value *Z = EmitScalarExpr(E->getArg(2));
9250     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
9251     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
9252     return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub");
9253   }
9254   case SystemZ::BI__builtin_s390_vfnmssb:
9255   case SystemZ::BI__builtin_s390_vfnmsdb: {
9256     llvm::Type *ResultType = ConvertType(E->getType());
9257     Value *X = EmitScalarExpr(E->getArg(0));
9258     Value *Y = EmitScalarExpr(E->getArg(1));
9259     Value *Z = EmitScalarExpr(E->getArg(2));
9260     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
9261     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
9262     Value *NegZ = Builder.CreateFSub(Zero, Z, "sub");
9263     return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ}));
9264   }
9265   case SystemZ::BI__builtin_s390_vflpsb:
9266   case SystemZ::BI__builtin_s390_vflpdb: {
9267     llvm::Type *ResultType = ConvertType(E->getType());
9268     Value *X = EmitScalarExpr(E->getArg(0));
9269     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
9270     return Builder.CreateCall(F, X);
9271   }
9272   case SystemZ::BI__builtin_s390_vflnsb:
9273   case SystemZ::BI__builtin_s390_vflndb: {
9274     llvm::Type *ResultType = ConvertType(E->getType());
9275     Value *X = EmitScalarExpr(E->getArg(0));
9276     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
9277     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
9278     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
9279   }
9280   case SystemZ::BI__builtin_s390_vfisb:
9281   case SystemZ::BI__builtin_s390_vfidb: {
9282     llvm::Type *ResultType = ConvertType(E->getType());
9283     Value *X = EmitScalarExpr(E->getArg(0));
9284     // Constant-fold the M4 and M5 mask arguments.
9285     llvm::APSInt M4, M5;
9286     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
9287     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
9288     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
9289     (void)IsConstM4; (void)IsConstM5;
9290     // Check whether this instance can be represented via a LLVM standard
9291     // intrinsic.  We only support some combinations of M4 and M5.
9292     Intrinsic::ID ID = Intrinsic::not_intrinsic;
9293     switch (M4.getZExtValue()) {
9294     default: break;
9295     case 0:  // IEEE-inexact exception allowed
9296       switch (M5.getZExtValue()) {
9297       default: break;
9298       case 0: ID = Intrinsic::rint; break;
9299       }
9300       break;
9301     case 4:  // IEEE-inexact exception suppressed
9302       switch (M5.getZExtValue()) {
9303       default: break;
9304       case 0: ID = Intrinsic::nearbyint; break;
9305       case 1: ID = Intrinsic::round; break;
9306       case 5: ID = Intrinsic::trunc; break;
9307       case 6: ID = Intrinsic::ceil; break;
9308       case 7: ID = Intrinsic::floor; break;
9309       }
9310       break;
9311     }
9312     if (ID != Intrinsic::not_intrinsic) {
9313       Function *F = CGM.getIntrinsic(ID, ResultType);
9314       return Builder.CreateCall(F, X);
9315     }
9316     switch (BuiltinID) {
9317       case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
9318       case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
9319       default: llvm_unreachable("Unknown BuiltinID");
9320     }
9321     Function *F = CGM.getIntrinsic(ID);
9322     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
9323     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
9324     return Builder.CreateCall(F, {X, M4Value, M5Value});
9325   }
9326   case SystemZ::BI__builtin_s390_vfmaxsb:
9327   case SystemZ::BI__builtin_s390_vfmaxdb: {
9328     llvm::Type *ResultType = ConvertType(E->getType());
9329     Value *X = EmitScalarExpr(E->getArg(0));
9330     Value *Y = EmitScalarExpr(E->getArg(1));
9331     // Constant-fold the M4 mask argument.
9332     llvm::APSInt M4;
9333     bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
9334     assert(IsConstM4 && "Constant arg isn't actually constant?");
9335     (void)IsConstM4;
9336     // Check whether this instance can be represented via a LLVM standard
9337     // intrinsic.  We only support some values of M4.
9338     Intrinsic::ID ID = Intrinsic::not_intrinsic;
9339     switch (M4.getZExtValue()) {
9340     default: break;
9341     case 4: ID = Intrinsic::maxnum; break;
9342     }
9343     if (ID != Intrinsic::not_intrinsic) {
9344       Function *F = CGM.getIntrinsic(ID, ResultType);
9345       return Builder.CreateCall(F, {X, Y});
9346     }
9347     switch (BuiltinID) {
9348       case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
9349       case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
9350       default: llvm_unreachable("Unknown BuiltinID");
9351     }
9352     Function *F = CGM.getIntrinsic(ID);
9353     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
9354     return Builder.CreateCall(F, {X, Y, M4Value});
9355   }
9356   case SystemZ::BI__builtin_s390_vfminsb:
9357   case SystemZ::BI__builtin_s390_vfmindb: {
9358     llvm::Type *ResultType = ConvertType(E->getType());
9359     Value *X = EmitScalarExpr(E->getArg(0));
9360     Value *Y = EmitScalarExpr(E->getArg(1));
9361     // Constant-fold the M4 mask argument.
9362     llvm::APSInt M4;
9363     bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
9364     assert(IsConstM4 && "Constant arg isn't actually constant?");
9365     (void)IsConstM4;
9366     // Check whether this instance can be represented via a LLVM standard
9367     // intrinsic.  We only support some values of M4.
9368     Intrinsic::ID ID = Intrinsic::not_intrinsic;
9369     switch (M4.getZExtValue()) {
9370     default: break;
9371     case 4: ID = Intrinsic::minnum; break;
9372     }
9373     if (ID != Intrinsic::not_intrinsic) {
9374       Function *F = CGM.getIntrinsic(ID, ResultType);
9375       return Builder.CreateCall(F, {X, Y});
9376     }
9377     switch (BuiltinID) {
9378       case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
9379       case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
9380       default: llvm_unreachable("Unknown BuiltinID");
9381     }
9382     Function *F = CGM.getIntrinsic(ID);
9383     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
9384     return Builder.CreateCall(F, {X, Y, M4Value});
9385   }
9386 
9387   // Vector intrisincs that output the post-instruction CC value.
9388 
9389 #define INTRINSIC_WITH_CC(NAME) \
9390     case SystemZ::BI__builtin_##NAME: \
9391       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
9392 
9393   INTRINSIC_WITH_CC(s390_vpkshs);
9394   INTRINSIC_WITH_CC(s390_vpksfs);
9395   INTRINSIC_WITH_CC(s390_vpksgs);
9396 
9397   INTRINSIC_WITH_CC(s390_vpklshs);
9398   INTRINSIC_WITH_CC(s390_vpklsfs);
9399   INTRINSIC_WITH_CC(s390_vpklsgs);
9400 
9401   INTRINSIC_WITH_CC(s390_vceqbs);
9402   INTRINSIC_WITH_CC(s390_vceqhs);
9403   INTRINSIC_WITH_CC(s390_vceqfs);
9404   INTRINSIC_WITH_CC(s390_vceqgs);
9405 
9406   INTRINSIC_WITH_CC(s390_vchbs);
9407   INTRINSIC_WITH_CC(s390_vchhs);
9408   INTRINSIC_WITH_CC(s390_vchfs);
9409   INTRINSIC_WITH_CC(s390_vchgs);
9410 
9411   INTRINSIC_WITH_CC(s390_vchlbs);
9412   INTRINSIC_WITH_CC(s390_vchlhs);
9413   INTRINSIC_WITH_CC(s390_vchlfs);
9414   INTRINSIC_WITH_CC(s390_vchlgs);
9415 
9416   INTRINSIC_WITH_CC(s390_vfaebs);
9417   INTRINSIC_WITH_CC(s390_vfaehs);
9418   INTRINSIC_WITH_CC(s390_vfaefs);
9419 
9420   INTRINSIC_WITH_CC(s390_vfaezbs);
9421   INTRINSIC_WITH_CC(s390_vfaezhs);
9422   INTRINSIC_WITH_CC(s390_vfaezfs);
9423 
9424   INTRINSIC_WITH_CC(s390_vfeebs);
9425   INTRINSIC_WITH_CC(s390_vfeehs);
9426   INTRINSIC_WITH_CC(s390_vfeefs);
9427 
9428   INTRINSIC_WITH_CC(s390_vfeezbs);
9429   INTRINSIC_WITH_CC(s390_vfeezhs);
9430   INTRINSIC_WITH_CC(s390_vfeezfs);
9431 
9432   INTRINSIC_WITH_CC(s390_vfenebs);
9433   INTRINSIC_WITH_CC(s390_vfenehs);
9434   INTRINSIC_WITH_CC(s390_vfenefs);
9435 
9436   INTRINSIC_WITH_CC(s390_vfenezbs);
9437   INTRINSIC_WITH_CC(s390_vfenezhs);
9438   INTRINSIC_WITH_CC(s390_vfenezfs);
9439 
9440   INTRINSIC_WITH_CC(s390_vistrbs);
9441   INTRINSIC_WITH_CC(s390_vistrhs);
9442   INTRINSIC_WITH_CC(s390_vistrfs);
9443 
9444   INTRINSIC_WITH_CC(s390_vstrcbs);
9445   INTRINSIC_WITH_CC(s390_vstrchs);
9446   INTRINSIC_WITH_CC(s390_vstrcfs);
9447 
9448   INTRINSIC_WITH_CC(s390_vstrczbs);
9449   INTRINSIC_WITH_CC(s390_vstrczhs);
9450   INTRINSIC_WITH_CC(s390_vstrczfs);
9451 
9452   INTRINSIC_WITH_CC(s390_vfcesbs);
9453   INTRINSIC_WITH_CC(s390_vfcedbs);
9454   INTRINSIC_WITH_CC(s390_vfchsbs);
9455   INTRINSIC_WITH_CC(s390_vfchdbs);
9456   INTRINSIC_WITH_CC(s390_vfchesbs);
9457   INTRINSIC_WITH_CC(s390_vfchedbs);
9458 
9459   INTRINSIC_WITH_CC(s390_vftcisb);
9460   INTRINSIC_WITH_CC(s390_vftcidb);
9461 
9462 #undef INTRINSIC_WITH_CC
9463 
9464   default:
9465     return nullptr;
9466   }
9467 }
9468 
9469 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
9470                                              const CallExpr *E) {
9471   auto MakeLdg = [&](unsigned IntrinsicID) {
9472     Value *Ptr = EmitScalarExpr(E->getArg(0));
9473     clang::CharUnits Align =
9474         getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
9475     return Builder.CreateCall(
9476         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
9477                                        Ptr->getType()}),
9478         {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
9479   };
9480   auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
9481     Value *Ptr = EmitScalarExpr(E->getArg(0));
9482     return Builder.CreateCall(
9483         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
9484                                        Ptr->getType()}),
9485         {Ptr, EmitScalarExpr(E->getArg(1))});
9486   };
9487   switch (BuiltinID) {
9488   case NVPTX::BI__nvvm_atom_add_gen_i:
9489   case NVPTX::BI__nvvm_atom_add_gen_l:
9490   case NVPTX::BI__nvvm_atom_add_gen_ll:
9491     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
9492 
9493   case NVPTX::BI__nvvm_atom_sub_gen_i:
9494   case NVPTX::BI__nvvm_atom_sub_gen_l:
9495   case NVPTX::BI__nvvm_atom_sub_gen_ll:
9496     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
9497 
9498   case NVPTX::BI__nvvm_atom_and_gen_i:
9499   case NVPTX::BI__nvvm_atom_and_gen_l:
9500   case NVPTX::BI__nvvm_atom_and_gen_ll:
9501     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
9502 
9503   case NVPTX::BI__nvvm_atom_or_gen_i:
9504   case NVPTX::BI__nvvm_atom_or_gen_l:
9505   case NVPTX::BI__nvvm_atom_or_gen_ll:
9506     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
9507 
9508   case NVPTX::BI__nvvm_atom_xor_gen_i:
9509   case NVPTX::BI__nvvm_atom_xor_gen_l:
9510   case NVPTX::BI__nvvm_atom_xor_gen_ll:
9511     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
9512 
9513   case NVPTX::BI__nvvm_atom_xchg_gen_i:
9514   case NVPTX::BI__nvvm_atom_xchg_gen_l:
9515   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
9516     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
9517 
9518   case NVPTX::BI__nvvm_atom_max_gen_i:
9519   case NVPTX::BI__nvvm_atom_max_gen_l:
9520   case NVPTX::BI__nvvm_atom_max_gen_ll:
9521     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
9522 
9523   case NVPTX::BI__nvvm_atom_max_gen_ui:
9524   case NVPTX::BI__nvvm_atom_max_gen_ul:
9525   case NVPTX::BI__nvvm_atom_max_gen_ull:
9526     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
9527 
9528   case NVPTX::BI__nvvm_atom_min_gen_i:
9529   case NVPTX::BI__nvvm_atom_min_gen_l:
9530   case NVPTX::BI__nvvm_atom_min_gen_ll:
9531     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
9532 
9533   case NVPTX::BI__nvvm_atom_min_gen_ui:
9534   case NVPTX::BI__nvvm_atom_min_gen_ul:
9535   case NVPTX::BI__nvvm_atom_min_gen_ull:
9536     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
9537 
9538   case NVPTX::BI__nvvm_atom_cas_gen_i:
9539   case NVPTX::BI__nvvm_atom_cas_gen_l:
9540   case NVPTX::BI__nvvm_atom_cas_gen_ll:
9541     // __nvvm_atom_cas_gen_* should return the old value rather than the
9542     // success flag.
9543     return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
9544 
9545   case NVPTX::BI__nvvm_atom_add_gen_f: {
9546     Value *Ptr = EmitScalarExpr(E->getArg(0));
9547     Value *Val = EmitScalarExpr(E->getArg(1));
9548     // atomicrmw only deals with integer arguments so we need to use
9549     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
9550     Value *FnALAF32 =
9551         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
9552     return Builder.CreateCall(FnALAF32, {Ptr, Val});
9553   }
9554 
9555   case NVPTX::BI__nvvm_atom_inc_gen_ui: {
9556     Value *Ptr = EmitScalarExpr(E->getArg(0));
9557     Value *Val = EmitScalarExpr(E->getArg(1));
9558     Value *FnALI32 =
9559         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
9560     return Builder.CreateCall(FnALI32, {Ptr, Val});
9561   }
9562 
9563   case NVPTX::BI__nvvm_atom_dec_gen_ui: {
9564     Value *Ptr = EmitScalarExpr(E->getArg(0));
9565     Value *Val = EmitScalarExpr(E->getArg(1));
9566     Value *FnALD32 =
9567         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
9568     return Builder.CreateCall(FnALD32, {Ptr, Val});
9569   }
9570 
9571   case NVPTX::BI__nvvm_ldg_c:
9572   case NVPTX::BI__nvvm_ldg_c2:
9573   case NVPTX::BI__nvvm_ldg_c4:
9574   case NVPTX::BI__nvvm_ldg_s:
9575   case NVPTX::BI__nvvm_ldg_s2:
9576   case NVPTX::BI__nvvm_ldg_s4:
9577   case NVPTX::BI__nvvm_ldg_i:
9578   case NVPTX::BI__nvvm_ldg_i2:
9579   case NVPTX::BI__nvvm_ldg_i4:
9580   case NVPTX::BI__nvvm_ldg_l:
9581   case NVPTX::BI__nvvm_ldg_ll:
9582   case NVPTX::BI__nvvm_ldg_ll2:
9583   case NVPTX::BI__nvvm_ldg_uc:
9584   case NVPTX::BI__nvvm_ldg_uc2:
9585   case NVPTX::BI__nvvm_ldg_uc4:
9586   case NVPTX::BI__nvvm_ldg_us:
9587   case NVPTX::BI__nvvm_ldg_us2:
9588   case NVPTX::BI__nvvm_ldg_us4:
9589   case NVPTX::BI__nvvm_ldg_ui:
9590   case NVPTX::BI__nvvm_ldg_ui2:
9591   case NVPTX::BI__nvvm_ldg_ui4:
9592   case NVPTX::BI__nvvm_ldg_ul:
9593   case NVPTX::BI__nvvm_ldg_ull:
9594   case NVPTX::BI__nvvm_ldg_ull2:
9595     // PTX Interoperability section 2.2: "For a vector with an even number of
9596     // elements, its alignment is set to number of elements times the alignment
9597     // of its member: n*alignof(t)."
9598     return MakeLdg(Intrinsic::nvvm_ldg_global_i);
9599   case NVPTX::BI__nvvm_ldg_f:
9600   case NVPTX::BI__nvvm_ldg_f2:
9601   case NVPTX::BI__nvvm_ldg_f4:
9602   case NVPTX::BI__nvvm_ldg_d:
9603   case NVPTX::BI__nvvm_ldg_d2:
9604     return MakeLdg(Intrinsic::nvvm_ldg_global_f);
9605 
9606   case NVPTX::BI__nvvm_atom_cta_add_gen_i:
9607   case NVPTX::BI__nvvm_atom_cta_add_gen_l:
9608   case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
9609     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
9610   case NVPTX::BI__nvvm_atom_sys_add_gen_i:
9611   case NVPTX::BI__nvvm_atom_sys_add_gen_l:
9612   case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
9613     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
9614   case NVPTX::BI__nvvm_atom_cta_add_gen_f:
9615   case NVPTX::BI__nvvm_atom_cta_add_gen_d:
9616     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
9617   case NVPTX::BI__nvvm_atom_sys_add_gen_f:
9618   case NVPTX::BI__nvvm_atom_sys_add_gen_d:
9619     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
9620   case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
9621   case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
9622   case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
9623     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
9624   case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
9625   case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
9626   case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
9627     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
9628   case NVPTX::BI__nvvm_atom_cta_max_gen_i:
9629   case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
9630   case NVPTX::BI__nvvm_atom_cta_max_gen_l:
9631   case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
9632   case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
9633   case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
9634     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
9635   case NVPTX::BI__nvvm_atom_sys_max_gen_i:
9636   case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
9637   case NVPTX::BI__nvvm_atom_sys_max_gen_l:
9638   case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
9639   case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
9640   case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
9641     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
9642   case NVPTX::BI__nvvm_atom_cta_min_gen_i:
9643   case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
9644   case NVPTX::BI__nvvm_atom_cta_min_gen_l:
9645   case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
9646   case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
9647   case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
9648     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
9649   case NVPTX::BI__nvvm_atom_sys_min_gen_i:
9650   case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
9651   case NVPTX::BI__nvvm_atom_sys_min_gen_l:
9652   case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
9653   case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
9654   case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
9655     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
9656   case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
9657     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
9658   case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
9659     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
9660   case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
9661     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
9662   case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
9663     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
9664   case NVPTX::BI__nvvm_atom_cta_and_gen_i:
9665   case NVPTX::BI__nvvm_atom_cta_and_gen_l:
9666   case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
9667     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
9668   case NVPTX::BI__nvvm_atom_sys_and_gen_i:
9669   case NVPTX::BI__nvvm_atom_sys_and_gen_l:
9670   case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
9671     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
9672   case NVPTX::BI__nvvm_atom_cta_or_gen_i:
9673   case NVPTX::BI__nvvm_atom_cta_or_gen_l:
9674   case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
9675     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
9676   case NVPTX::BI__nvvm_atom_sys_or_gen_i:
9677   case NVPTX::BI__nvvm_atom_sys_or_gen_l:
9678   case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
9679     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
9680   case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
9681   case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
9682   case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
9683     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
9684   case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
9685   case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
9686   case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
9687     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
9688   case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
9689   case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
9690   case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
9691     Value *Ptr = EmitScalarExpr(E->getArg(0));
9692     return Builder.CreateCall(
9693         CGM.getIntrinsic(
9694             Intrinsic::nvvm_atomic_cas_gen_i_cta,
9695             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9696         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9697   }
9698   case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
9699   case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
9700   case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
9701     Value *Ptr = EmitScalarExpr(E->getArg(0));
9702     return Builder.CreateCall(
9703         CGM.getIntrinsic(
9704             Intrinsic::nvvm_atomic_cas_gen_i_sys,
9705             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9706         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9707   }
9708   case NVPTX::BI__nvvm_match_all_sync_i32p:
9709   case NVPTX::BI__nvvm_match_all_sync_i64p: {
9710     Value *Mask = EmitScalarExpr(E->getArg(0));
9711     Value *Val = EmitScalarExpr(E->getArg(1));
9712     Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
9713     Value *ResultPair = Builder.CreateCall(
9714         CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
9715                              ? Intrinsic::nvvm_match_all_sync_i32p
9716                              : Intrinsic::nvvm_match_all_sync_i64p),
9717         {Mask, Val});
9718     Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
9719                                      PredOutPtr.getElementType());
9720     Builder.CreateStore(Pred, PredOutPtr);
9721     return Builder.CreateExtractValue(ResultPair, 0);
9722   }
9723   default:
9724     return nullptr;
9725   }
9726 }
9727 
9728 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
9729                                                    const CallExpr *E) {
9730   switch (BuiltinID) {
9731   case WebAssembly::BI__builtin_wasm_current_memory: {
9732     llvm::Type *ResultType = ConvertType(E->getType());
9733     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
9734     return Builder.CreateCall(Callee);
9735   }
9736   case WebAssembly::BI__builtin_wasm_grow_memory: {
9737     Value *X = EmitScalarExpr(E->getArg(0));
9738     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
9739     return Builder.CreateCall(Callee, X);
9740   }
9741   case WebAssembly::BI__builtin_wasm_throw: {
9742     Value *Tag = EmitScalarExpr(E->getArg(0));
9743     Value *Obj = EmitScalarExpr(E->getArg(1));
9744     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
9745     return Builder.CreateCall(Callee, {Tag, Obj});
9746   }
9747   case WebAssembly::BI__builtin_wasm_rethrow: {
9748     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
9749     return Builder.CreateCall(Callee);
9750   }
9751 
9752   default:
9753     return nullptr;
9754   }
9755 }
9756