1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGObjCRuntime.h"
16 #include "CGOpenCLRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "CodeGenModule.h"
19 #include "TargetInfo.h"
20 #include "clang/AST/ASTContext.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/Analysis/Analyses/OSLog.h"
23 #include "clang/Basic/TargetBuiltins.h"
24 #include "clang/Basic/TargetInfo.h"
25 #include "clang/CodeGen/CGFunctionInfo.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/IR/CallSite.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/InlineAsm.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/MDBuilder.h"
32 #include <sstream>
33 
34 using namespace clang;
35 using namespace CodeGen;
36 using namespace llvm;
37 
38 static
39 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
40   return std::min(High, std::max(Low, Value));
41 }
42 
43 /// getBuiltinLibFunction - Given a builtin id for a function like
44 /// "__builtin_fabsf", return a Function* for "fabsf".
45 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
46                                                      unsigned BuiltinID) {
47   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
48 
49   // Get the name, skip over the __builtin_ prefix (if necessary).
50   StringRef Name;
51   GlobalDecl D(FD);
52 
53   // If the builtin has been declared explicitly with an assembler label,
54   // use the mangled name. This differs from the plain label on platforms
55   // that prefix labels.
56   if (FD->hasAttr<AsmLabelAttr>())
57     Name = getMangledName(D);
58   else
59     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
60 
61   llvm::FunctionType *Ty =
62     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
63 
64   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
65 }
66 
67 /// Emit the conversions required to turn the given value into an
68 /// integer of the given size.
69 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
70                         QualType T, llvm::IntegerType *IntType) {
71   V = CGF.EmitToMemory(V, T);
72 
73   if (V->getType()->isPointerTy())
74     return CGF.Builder.CreatePtrToInt(V, IntType);
75 
76   assert(V->getType() == IntType);
77   return V;
78 }
79 
80 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
81                           QualType T, llvm::Type *ResultType) {
82   V = CGF.EmitFromMemory(V, T);
83 
84   if (ResultType->isPointerTy())
85     return CGF.Builder.CreateIntToPtr(V, ResultType);
86 
87   assert(V->getType() == ResultType);
88   return V;
89 }
90 
91 /// Utility to insert an atomic instruction based on Instrinsic::ID
92 /// and the expression node.
93 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
94                                     llvm::AtomicRMWInst::BinOp Kind,
95                                     const CallExpr *E) {
96   QualType T = E->getType();
97   assert(E->getArg(0)->getType()->isPointerType());
98   assert(CGF.getContext().hasSameUnqualifiedType(T,
99                                   E->getArg(0)->getType()->getPointeeType()));
100   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
101 
102   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
103   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
104 
105   llvm::IntegerType *IntType =
106     llvm::IntegerType::get(CGF.getLLVMContext(),
107                            CGF.getContext().getTypeSize(T));
108   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
109 
110   llvm::Value *Args[2];
111   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
112   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
113   llvm::Type *ValueType = Args[1]->getType();
114   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
115 
116   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
117       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
118   return EmitFromInt(CGF, Result, T, ValueType);
119 }
120 
121 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
122   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
123   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
124 
125   // Convert the type of the pointer to a pointer to the stored type.
126   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
127   Value *BC = CGF.Builder.CreateBitCast(
128       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
129   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
130   LV.setNontemporal(true);
131   CGF.EmitStoreOfScalar(Val, LV, false);
132   return nullptr;
133 }
134 
135 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
136   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
137 
138   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
139   LV.setNontemporal(true);
140   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
141 }
142 
143 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
144                                llvm::AtomicRMWInst::BinOp Kind,
145                                const CallExpr *E) {
146   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
147 }
148 
149 /// Utility to insert an atomic instruction based Instrinsic::ID and
150 /// the expression node, where the return value is the result of the
151 /// operation.
152 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
153                                    llvm::AtomicRMWInst::BinOp Kind,
154                                    const CallExpr *E,
155                                    Instruction::BinaryOps Op,
156                                    bool Invert = false) {
157   QualType T = E->getType();
158   assert(E->getArg(0)->getType()->isPointerType());
159   assert(CGF.getContext().hasSameUnqualifiedType(T,
160                                   E->getArg(0)->getType()->getPointeeType()));
161   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
162 
163   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
164   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
165 
166   llvm::IntegerType *IntType =
167     llvm::IntegerType::get(CGF.getLLVMContext(),
168                            CGF.getContext().getTypeSize(T));
169   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
170 
171   llvm::Value *Args[2];
172   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
173   llvm::Type *ValueType = Args[1]->getType();
174   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
175   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
176 
177   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
178       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
179   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
180   if (Invert)
181     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
182                                      llvm::ConstantInt::get(IntType, -1));
183   Result = EmitFromInt(CGF, Result, T, ValueType);
184   return RValue::get(Result);
185 }
186 
187 /// @brief Utility to insert an atomic cmpxchg instruction.
188 ///
189 /// @param CGF The current codegen function.
190 /// @param E   Builtin call expression to convert to cmpxchg.
191 ///            arg0 - address to operate on
192 ///            arg1 - value to compare with
193 ///            arg2 - new value
194 /// @param ReturnBool Specifies whether to return success flag of
195 ///                   cmpxchg result or the old value.
196 ///
197 /// @returns result of cmpxchg, according to ReturnBool
198 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
199                                      bool ReturnBool) {
200   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
201   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
202   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
203 
204   llvm::IntegerType *IntType = llvm::IntegerType::get(
205       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
206   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
207 
208   Value *Args[3];
209   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
210   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
211   llvm::Type *ValueType = Args[1]->getType();
212   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
213   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
214 
215   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
216       Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
217       llvm::AtomicOrdering::SequentiallyConsistent);
218   if (ReturnBool)
219     // Extract boolean success flag and zext it to int.
220     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
221                                   CGF.ConvertType(E->getType()));
222   else
223     // Extract old value and emit it using the same type as compare value.
224     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
225                        ValueType);
226 }
227 
228 // Emit a simple mangled intrinsic that has 1 argument and a return type
229 // matching the argument type.
230 static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
231                                const CallExpr *E,
232                                unsigned IntrinsicID) {
233   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
234 
235   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
236   return CGF.Builder.CreateCall(F, Src0);
237 }
238 
239 // Emit an intrinsic that has 2 operands of the same type as its result.
240 static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
241                                 const CallExpr *E,
242                                 unsigned IntrinsicID) {
243   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
244   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
245 
246   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
247   return CGF.Builder.CreateCall(F, { Src0, Src1 });
248 }
249 
250 // Emit an intrinsic that has 3 operands of the same type as its result.
251 static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
252                                  const CallExpr *E,
253                                  unsigned IntrinsicID) {
254   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
255   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
256   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
257 
258   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
259   return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
260 }
261 
262 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
263 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
264                                const CallExpr *E,
265                                unsigned IntrinsicID) {
266   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
267   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
268 
269   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
270   return CGF.Builder.CreateCall(F, {Src0, Src1});
271 }
272 
273 /// EmitFAbs - Emit a call to @llvm.fabs().
274 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
275   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
276   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
277   Call->setDoesNotAccessMemory();
278   return Call;
279 }
280 
281 /// Emit the computation of the sign bit for a floating point value. Returns
282 /// the i1 sign bit value.
283 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
284   LLVMContext &C = CGF.CGM.getLLVMContext();
285 
286   llvm::Type *Ty = V->getType();
287   int Width = Ty->getPrimitiveSizeInBits();
288   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
289   V = CGF.Builder.CreateBitCast(V, IntTy);
290   if (Ty->isPPC_FP128Ty()) {
291     // We want the sign bit of the higher-order double. The bitcast we just
292     // did works as if the double-double was stored to memory and then
293     // read as an i128. The "store" will put the higher-order double in the
294     // lower address in both little- and big-Endian modes, but the "load"
295     // will treat those bits as a different part of the i128: the low bits in
296     // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
297     // we need to shift the high bits down to the low before truncating.
298     Width >>= 1;
299     if (CGF.getTarget().isBigEndian()) {
300       Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
301       V = CGF.Builder.CreateLShr(V, ShiftCst);
302     }
303     // We are truncating value in order to extract the higher-order
304     // double, which we will be using to extract the sign from.
305     IntTy = llvm::IntegerType::get(C, Width);
306     V = CGF.Builder.CreateTrunc(V, IntTy);
307   }
308   Value *Zero = llvm::Constant::getNullValue(IntTy);
309   return CGF.Builder.CreateICmpSLT(V, Zero);
310 }
311 
312 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
313                               const CallExpr *E, llvm::Constant *calleeValue) {
314   CGCallee callee = CGCallee::forDirect(calleeValue, FD);
315   return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
316 }
317 
318 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
319 /// depending on IntrinsicID.
320 ///
321 /// \arg CGF The current codegen function.
322 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
323 /// \arg X The first argument to the llvm.*.with.overflow.*.
324 /// \arg Y The second argument to the llvm.*.with.overflow.*.
325 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
326 /// \returns The result (i.e. sum/product) returned by the intrinsic.
327 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
328                                           const llvm::Intrinsic::ID IntrinsicID,
329                                           llvm::Value *X, llvm::Value *Y,
330                                           llvm::Value *&Carry) {
331   // Make sure we have integers of the same width.
332   assert(X->getType() == Y->getType() &&
333          "Arguments must be the same type. (Did you forget to make sure both "
334          "arguments have the same integer width?)");
335 
336   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
337   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
338   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
339   return CGF.Builder.CreateExtractValue(Tmp, 0);
340 }
341 
342 static Value *emitRangedBuiltin(CodeGenFunction &CGF,
343                                 unsigned IntrinsicID,
344                                 int low, int high) {
345     llvm::MDBuilder MDHelper(CGF.getLLVMContext());
346     llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
347     Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
348     llvm::Instruction *Call = CGF.Builder.CreateCall(F);
349     Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
350     return Call;
351 }
352 
353 namespace {
354   struct WidthAndSignedness {
355     unsigned Width;
356     bool Signed;
357   };
358 }
359 
360 static WidthAndSignedness
361 getIntegerWidthAndSignedness(const clang::ASTContext &context,
362                              const clang::QualType Type) {
363   assert(Type->isIntegerType() && "Given type is not an integer.");
364   unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
365   bool Signed = Type->isSignedIntegerType();
366   return {Width, Signed};
367 }
368 
369 // Given one or more integer types, this function produces an integer type that
370 // encompasses them: any value in one of the given types could be expressed in
371 // the encompassing type.
372 static struct WidthAndSignedness
373 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
374   assert(Types.size() > 0 && "Empty list of types.");
375 
376   // If any of the given types is signed, we must return a signed type.
377   bool Signed = false;
378   for (const auto &Type : Types) {
379     Signed |= Type.Signed;
380   }
381 
382   // The encompassing type must have a width greater than or equal to the width
383   // of the specified types.  Aditionally, if the encompassing type is signed,
384   // its width must be strictly greater than the width of any unsigned types
385   // given.
386   unsigned Width = 0;
387   for (const auto &Type : Types) {
388     unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
389     if (Width < MinWidth) {
390       Width = MinWidth;
391     }
392   }
393 
394   return {Width, Signed};
395 }
396 
397 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
398   llvm::Type *DestType = Int8PtrTy;
399   if (ArgValue->getType() != DestType)
400     ArgValue =
401         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
402 
403   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
404   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
405 }
406 
407 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
408 /// __builtin_object_size(p, @p To) is correct
409 static bool areBOSTypesCompatible(int From, int To) {
410   // Note: Our __builtin_object_size implementation currently treats Type=0 and
411   // Type=2 identically. Encoding this implementation detail here may make
412   // improving __builtin_object_size difficult in the future, so it's omitted.
413   return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
414 }
415 
416 static llvm::Value *
417 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
418   return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
419 }
420 
421 llvm::Value *
422 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
423                                                  llvm::IntegerType *ResType,
424                                                  llvm::Value *EmittedE) {
425   uint64_t ObjectSize;
426   if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
427     return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
428   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
429 }
430 
431 /// Returns a Value corresponding to the size of the given expression.
432 /// This Value may be either of the following:
433 ///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
434 ///     it)
435 ///   - A call to the @llvm.objectsize intrinsic
436 ///
437 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
438 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
439 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
440 llvm::Value *
441 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
442                                        llvm::IntegerType *ResType,
443                                        llvm::Value *EmittedE) {
444   // We need to reference an argument if the pointer is a parameter with the
445   // pass_object_size attribute.
446   if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
447     auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
448     auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
449     if (Param != nullptr && PS != nullptr &&
450         areBOSTypesCompatible(PS->getType(), Type)) {
451       auto Iter = SizeArguments.find(Param);
452       assert(Iter != SizeArguments.end());
453 
454       const ImplicitParamDecl *D = Iter->second;
455       auto DIter = LocalDeclMap.find(D);
456       assert(DIter != LocalDeclMap.end());
457 
458       return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
459                               getContext().getSizeType(), E->getLocStart());
460     }
461   }
462 
463   // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
464   // evaluate E for side-effects. In either case, we shouldn't lower to
465   // @llvm.objectsize.
466   if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
467     return getDefaultBuiltinObjectSizeResult(Type, ResType);
468 
469   Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
470   assert(Ptr->getType()->isPointerTy() &&
471          "Non-pointer passed to __builtin_object_size?");
472 
473   Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
474 
475   // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
476   Value *Min = Builder.getInt1((Type & 2) != 0);
477   // For GCC compatability, __builtin_object_size treat NULL as unknown size.
478   Value *NullIsUnknown = Builder.getTrue();
479   return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
480 }
481 
482 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
483 // handle them here.
484 enum class CodeGenFunction::MSVCIntrin {
485   _BitScanForward,
486   _BitScanReverse,
487   _InterlockedAnd,
488   _InterlockedDecrement,
489   _InterlockedExchange,
490   _InterlockedExchangeAdd,
491   _InterlockedExchangeSub,
492   _InterlockedIncrement,
493   _InterlockedOr,
494   _InterlockedXor,
495   _interlockedbittestandset,
496   __fastfail,
497 };
498 
499 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
500                                             const CallExpr *E) {
501   switch (BuiltinID) {
502   case MSVCIntrin::_BitScanForward:
503   case MSVCIntrin::_BitScanReverse: {
504     Value *ArgValue = EmitScalarExpr(E->getArg(1));
505 
506     llvm::Type *ArgType = ArgValue->getType();
507     llvm::Type *IndexType =
508       EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
509     llvm::Type *ResultType = ConvertType(E->getType());
510 
511     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
512     Value *ResZero = llvm::Constant::getNullValue(ResultType);
513     Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
514 
515     BasicBlock *Begin = Builder.GetInsertBlock();
516     BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
517     Builder.SetInsertPoint(End);
518     PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
519 
520     Builder.SetInsertPoint(Begin);
521     Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
522     BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
523     Builder.CreateCondBr(IsZero, End, NotZero);
524     Result->addIncoming(ResZero, Begin);
525 
526     Builder.SetInsertPoint(NotZero);
527     Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
528 
529     if (BuiltinID == MSVCIntrin::_BitScanForward) {
530       Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
531       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
532       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
533       Builder.CreateStore(ZeroCount, IndexAddress, false);
534     } else {
535       unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
536       Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
537 
538       Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
539       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
540       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
541       Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
542       Builder.CreateStore(Index, IndexAddress, false);
543     }
544     Builder.CreateBr(End);
545     Result->addIncoming(ResOne, NotZero);
546 
547     Builder.SetInsertPoint(End);
548     return Result;
549   }
550   case MSVCIntrin::_InterlockedAnd:
551     return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
552   case MSVCIntrin::_InterlockedExchange:
553     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
554   case MSVCIntrin::_InterlockedExchangeAdd:
555     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
556   case MSVCIntrin::_InterlockedExchangeSub:
557     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
558   case MSVCIntrin::_InterlockedOr:
559     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
560   case MSVCIntrin::_InterlockedXor:
561     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
562 
563   case MSVCIntrin::_interlockedbittestandset: {
564     llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
565     llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
566     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
567         AtomicRMWInst::Or, Addr,
568         Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
569         llvm::AtomicOrdering::SequentiallyConsistent);
570     // Shift the relevant bit to the least significant position, truncate to
571     // the result type, and test the low bit.
572     llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
573     llvm::Value *Truncated =
574         Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
575     return Builder.CreateAnd(Truncated,
576                              ConstantInt::get(Truncated->getType(), 1));
577   }
578 
579   case MSVCIntrin::_InterlockedDecrement: {
580     llvm::Type *IntTy = ConvertType(E->getType());
581     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
582       AtomicRMWInst::Sub,
583       EmitScalarExpr(E->getArg(0)),
584       ConstantInt::get(IntTy, 1),
585       llvm::AtomicOrdering::SequentiallyConsistent);
586     return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
587   }
588   case MSVCIntrin::_InterlockedIncrement: {
589     llvm::Type *IntTy = ConvertType(E->getType());
590     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
591       AtomicRMWInst::Add,
592       EmitScalarExpr(E->getArg(0)),
593       ConstantInt::get(IntTy, 1),
594       llvm::AtomicOrdering::SequentiallyConsistent);
595     return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
596   }
597 
598   case MSVCIntrin::__fastfail: {
599     // Request immediate process termination from the kernel. The instruction
600     // sequences to do this are documented on MSDN:
601     // https://msdn.microsoft.com/en-us/library/dn774154.aspx
602     llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
603     StringRef Asm, Constraints;
604     switch (ISA) {
605     default:
606       ErrorUnsupported(E, "__fastfail call for this architecture");
607       break;
608     case llvm::Triple::x86:
609     case llvm::Triple::x86_64:
610       Asm = "int $$0x29";
611       Constraints = "{cx}";
612       break;
613     case llvm::Triple::thumb:
614       Asm = "udf #251";
615       Constraints = "{r0}";
616       break;
617     }
618     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
619     llvm::InlineAsm *IA =
620         llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
621     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
622         getLLVMContext(), llvm::AttributeList::FunctionIndex,
623         llvm::Attribute::NoReturn);
624     CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
625     CS.setAttributes(NoReturnAttr);
626     return CS.getInstruction();
627   }
628   }
629   llvm_unreachable("Incorrect MSVC intrinsic!");
630 }
631 
632 namespace {
633 // ARC cleanup for __builtin_os_log_format
634 struct CallObjCArcUse final : EHScopeStack::Cleanup {
635   CallObjCArcUse(llvm::Value *object) : object(object) {}
636   llvm::Value *object;
637 
638   void Emit(CodeGenFunction &CGF, Flags flags) override {
639     CGF.EmitARCIntrinsicUse(object);
640   }
641 };
642 }
643 
644 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
645                                         unsigned BuiltinID, const CallExpr *E,
646                                         ReturnValueSlot ReturnValue) {
647   // See if we can constant fold this builtin.  If so, don't emit it at all.
648   Expr::EvalResult Result;
649   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
650       !Result.hasSideEffects()) {
651     if (Result.Val.isInt())
652       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
653                                                 Result.Val.getInt()));
654     if (Result.Val.isFloat())
655       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
656                                                Result.Val.getFloat()));
657   }
658 
659   switch (BuiltinID) {
660   default: break;  // Handle intrinsics and libm functions below.
661   case Builtin::BI__builtin___CFStringMakeConstantString:
662   case Builtin::BI__builtin___NSStringMakeConstantString:
663     return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
664   case Builtin::BI__builtin_stdarg_start:
665   case Builtin::BI__builtin_va_start:
666   case Builtin::BI__va_start:
667   case Builtin::BI__builtin_va_end:
668     return RValue::get(
669         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
670                            ? EmitScalarExpr(E->getArg(0))
671                            : EmitVAListRef(E->getArg(0)).getPointer(),
672                        BuiltinID != Builtin::BI__builtin_va_end));
673   case Builtin::BI__builtin_va_copy: {
674     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
675     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
676 
677     llvm::Type *Type = Int8PtrTy;
678 
679     DstPtr = Builder.CreateBitCast(DstPtr, Type);
680     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
681     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
682                                           {DstPtr, SrcPtr}));
683   }
684   case Builtin::BI__builtin_abs:
685   case Builtin::BI__builtin_labs:
686   case Builtin::BI__builtin_llabs: {
687     Value *ArgValue = EmitScalarExpr(E->getArg(0));
688 
689     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
690     Value *CmpResult =
691     Builder.CreateICmpSGE(ArgValue,
692                           llvm::Constant::getNullValue(ArgValue->getType()),
693                                                             "abscond");
694     Value *Result =
695       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
696 
697     return RValue::get(Result);
698   }
699   case Builtin::BI__builtin_fabs:
700   case Builtin::BI__builtin_fabsf:
701   case Builtin::BI__builtin_fabsl: {
702     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
703   }
704   case Builtin::BI__builtin_fmod:
705   case Builtin::BI__builtin_fmodf:
706   case Builtin::BI__builtin_fmodl: {
707     Value *Arg1 = EmitScalarExpr(E->getArg(0));
708     Value *Arg2 = EmitScalarExpr(E->getArg(1));
709     Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
710     return RValue::get(Result);
711   }
712   case Builtin::BI__builtin_copysign:
713   case Builtin::BI__builtin_copysignf:
714   case Builtin::BI__builtin_copysignl: {
715     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
716   }
717   case Builtin::BI__builtin_ceil:
718   case Builtin::BI__builtin_ceilf:
719   case Builtin::BI__builtin_ceill: {
720     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
721   }
722   case Builtin::BI__builtin_floor:
723   case Builtin::BI__builtin_floorf:
724   case Builtin::BI__builtin_floorl: {
725     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
726   }
727   case Builtin::BI__builtin_trunc:
728   case Builtin::BI__builtin_truncf:
729   case Builtin::BI__builtin_truncl: {
730     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
731   }
732   case Builtin::BI__builtin_rint:
733   case Builtin::BI__builtin_rintf:
734   case Builtin::BI__builtin_rintl: {
735     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
736   }
737   case Builtin::BI__builtin_nearbyint:
738   case Builtin::BI__builtin_nearbyintf:
739   case Builtin::BI__builtin_nearbyintl: {
740     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
741   }
742   case Builtin::BI__builtin_round:
743   case Builtin::BI__builtin_roundf:
744   case Builtin::BI__builtin_roundl: {
745     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
746   }
747   case Builtin::BI__builtin_fmin:
748   case Builtin::BI__builtin_fminf:
749   case Builtin::BI__builtin_fminl: {
750     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
751   }
752   case Builtin::BI__builtin_fmax:
753   case Builtin::BI__builtin_fmaxf:
754   case Builtin::BI__builtin_fmaxl: {
755     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
756   }
757   case Builtin::BI__builtin_conj:
758   case Builtin::BI__builtin_conjf:
759   case Builtin::BI__builtin_conjl: {
760     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
761     Value *Real = ComplexVal.first;
762     Value *Imag = ComplexVal.second;
763     Value *Zero =
764       Imag->getType()->isFPOrFPVectorTy()
765         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
766         : llvm::Constant::getNullValue(Imag->getType());
767 
768     Imag = Builder.CreateFSub(Zero, Imag, "sub");
769     return RValue::getComplex(std::make_pair(Real, Imag));
770   }
771   case Builtin::BI__builtin_creal:
772   case Builtin::BI__builtin_crealf:
773   case Builtin::BI__builtin_creall:
774   case Builtin::BIcreal:
775   case Builtin::BIcrealf:
776   case Builtin::BIcreall: {
777     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
778     return RValue::get(ComplexVal.first);
779   }
780 
781   case Builtin::BI__builtin_cimag:
782   case Builtin::BI__builtin_cimagf:
783   case Builtin::BI__builtin_cimagl:
784   case Builtin::BIcimag:
785   case Builtin::BIcimagf:
786   case Builtin::BIcimagl: {
787     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
788     return RValue::get(ComplexVal.second);
789   }
790 
791   case Builtin::BI__builtin_ctzs:
792   case Builtin::BI__builtin_ctz:
793   case Builtin::BI__builtin_ctzl:
794   case Builtin::BI__builtin_ctzll: {
795     Value *ArgValue = EmitScalarExpr(E->getArg(0));
796 
797     llvm::Type *ArgType = ArgValue->getType();
798     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
799 
800     llvm::Type *ResultType = ConvertType(E->getType());
801     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
802     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
803     if (Result->getType() != ResultType)
804       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
805                                      "cast");
806     return RValue::get(Result);
807   }
808   case Builtin::BI__builtin_clzs:
809   case Builtin::BI__builtin_clz:
810   case Builtin::BI__builtin_clzl:
811   case Builtin::BI__builtin_clzll: {
812     Value *ArgValue = EmitScalarExpr(E->getArg(0));
813 
814     llvm::Type *ArgType = ArgValue->getType();
815     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
816 
817     llvm::Type *ResultType = ConvertType(E->getType());
818     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
819     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
820     if (Result->getType() != ResultType)
821       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
822                                      "cast");
823     return RValue::get(Result);
824   }
825   case Builtin::BI__builtin_ffs:
826   case Builtin::BI__builtin_ffsl:
827   case Builtin::BI__builtin_ffsll: {
828     // ffs(x) -> x ? cttz(x) + 1 : 0
829     Value *ArgValue = EmitScalarExpr(E->getArg(0));
830 
831     llvm::Type *ArgType = ArgValue->getType();
832     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
833 
834     llvm::Type *ResultType = ConvertType(E->getType());
835     Value *Tmp =
836         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
837                           llvm::ConstantInt::get(ArgType, 1));
838     Value *Zero = llvm::Constant::getNullValue(ArgType);
839     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
840     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
841     if (Result->getType() != ResultType)
842       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
843                                      "cast");
844     return RValue::get(Result);
845   }
846   case Builtin::BI__builtin_parity:
847   case Builtin::BI__builtin_parityl:
848   case Builtin::BI__builtin_parityll: {
849     // parity(x) -> ctpop(x) & 1
850     Value *ArgValue = EmitScalarExpr(E->getArg(0));
851 
852     llvm::Type *ArgType = ArgValue->getType();
853     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
854 
855     llvm::Type *ResultType = ConvertType(E->getType());
856     Value *Tmp = Builder.CreateCall(F, ArgValue);
857     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
858     if (Result->getType() != ResultType)
859       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
860                                      "cast");
861     return RValue::get(Result);
862   }
863   case Builtin::BI__popcnt16:
864   case Builtin::BI__popcnt:
865   case Builtin::BI__popcnt64:
866   case Builtin::BI__builtin_popcount:
867   case Builtin::BI__builtin_popcountl:
868   case Builtin::BI__builtin_popcountll: {
869     Value *ArgValue = EmitScalarExpr(E->getArg(0));
870 
871     llvm::Type *ArgType = ArgValue->getType();
872     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
873 
874     llvm::Type *ResultType = ConvertType(E->getType());
875     Value *Result = Builder.CreateCall(F, ArgValue);
876     if (Result->getType() != ResultType)
877       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
878                                      "cast");
879     return RValue::get(Result);
880   }
881   case Builtin::BI_rotr8:
882   case Builtin::BI_rotr16:
883   case Builtin::BI_rotr:
884   case Builtin::BI_lrotr:
885   case Builtin::BI_rotr64: {
886     Value *Val = EmitScalarExpr(E->getArg(0));
887     Value *Shift = EmitScalarExpr(E->getArg(1));
888 
889     llvm::Type *ArgType = Val->getType();
890     Shift = Builder.CreateIntCast(Shift, ArgType, false);
891     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
892     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
893     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
894 
895     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
896     Shift = Builder.CreateAnd(Shift, Mask);
897     Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
898 
899     Value *RightShifted = Builder.CreateLShr(Val, Shift);
900     Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
901     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
902 
903     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
904     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
905     return RValue::get(Result);
906   }
907   case Builtin::BI_rotl8:
908   case Builtin::BI_rotl16:
909   case Builtin::BI_rotl:
910   case Builtin::BI_lrotl:
911   case Builtin::BI_rotl64: {
912     Value *Val = EmitScalarExpr(E->getArg(0));
913     Value *Shift = EmitScalarExpr(E->getArg(1));
914 
915     llvm::Type *ArgType = Val->getType();
916     Shift = Builder.CreateIntCast(Shift, ArgType, false);
917     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
918     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
919     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
920 
921     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
922     Shift = Builder.CreateAnd(Shift, Mask);
923     Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
924 
925     Value *LeftShifted = Builder.CreateShl(Val, Shift);
926     Value *RightShifted = Builder.CreateLShr(Val, RightShift);
927     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
928 
929     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
930     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
931     return RValue::get(Result);
932   }
933   case Builtin::BI__builtin_unpredictable: {
934     // Always return the argument of __builtin_unpredictable. LLVM does not
935     // handle this builtin. Metadata for this builtin should be added directly
936     // to instructions such as branches or switches that use it.
937     return RValue::get(EmitScalarExpr(E->getArg(0)));
938   }
939   case Builtin::BI__builtin_expect: {
940     Value *ArgValue = EmitScalarExpr(E->getArg(0));
941     llvm::Type *ArgType = ArgValue->getType();
942 
943     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
944     // Don't generate llvm.expect on -O0 as the backend won't use it for
945     // anything.
946     // Note, we still IRGen ExpectedValue because it could have side-effects.
947     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
948       return RValue::get(ArgValue);
949 
950     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
951     Value *Result =
952         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
953     return RValue::get(Result);
954   }
955   case Builtin::BI__builtin_assume_aligned: {
956     Value *PtrValue = EmitScalarExpr(E->getArg(0));
957     Value *OffsetValue =
958       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
959 
960     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
961     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
962     unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
963 
964     EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
965     return RValue::get(PtrValue);
966   }
967   case Builtin::BI__assume:
968   case Builtin::BI__builtin_assume: {
969     if (E->getArg(0)->HasSideEffects(getContext()))
970       return RValue::get(nullptr);
971 
972     Value *ArgValue = EmitScalarExpr(E->getArg(0));
973     Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
974     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
975   }
976   case Builtin::BI__builtin_bswap16:
977   case Builtin::BI__builtin_bswap32:
978   case Builtin::BI__builtin_bswap64: {
979     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
980   }
981   case Builtin::BI__builtin_bitreverse8:
982   case Builtin::BI__builtin_bitreverse16:
983   case Builtin::BI__builtin_bitreverse32:
984   case Builtin::BI__builtin_bitreverse64: {
985     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
986   }
987   case Builtin::BI__builtin_object_size: {
988     unsigned Type =
989         E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
990     auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
991 
992     // We pass this builtin onto the optimizer so that it can figure out the
993     // object size in more complex cases.
994     return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
995                                              /*EmittedE=*/nullptr));
996   }
997   case Builtin::BI__builtin_prefetch: {
998     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
999     // FIXME: Technically these constants should of type 'int', yes?
1000     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
1001       llvm::ConstantInt::get(Int32Ty, 0);
1002     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
1003       llvm::ConstantInt::get(Int32Ty, 3);
1004     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
1005     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
1006     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
1007   }
1008   case Builtin::BI__builtin_readcyclecounter: {
1009     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
1010     return RValue::get(Builder.CreateCall(F));
1011   }
1012   case Builtin::BI__builtin___clear_cache: {
1013     Value *Begin = EmitScalarExpr(E->getArg(0));
1014     Value *End = EmitScalarExpr(E->getArg(1));
1015     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
1016     return RValue::get(Builder.CreateCall(F, {Begin, End}));
1017   }
1018   case Builtin::BI__builtin_trap:
1019     return RValue::get(EmitTrapCall(Intrinsic::trap));
1020   case Builtin::BI__debugbreak:
1021     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
1022   case Builtin::BI__builtin_unreachable: {
1023     if (SanOpts.has(SanitizerKind::Unreachable)) {
1024       SanitizerScope SanScope(this);
1025       EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
1026                                SanitizerKind::Unreachable),
1027                 SanitizerHandler::BuiltinUnreachable,
1028                 EmitCheckSourceLocation(E->getExprLoc()), None);
1029     } else
1030       Builder.CreateUnreachable();
1031 
1032     // We do need to preserve an insertion point.
1033     EmitBlock(createBasicBlock("unreachable.cont"));
1034 
1035     return RValue::get(nullptr);
1036   }
1037 
1038   case Builtin::BI__builtin_powi:
1039   case Builtin::BI__builtin_powif:
1040   case Builtin::BI__builtin_powil: {
1041     Value *Base = EmitScalarExpr(E->getArg(0));
1042     Value *Exponent = EmitScalarExpr(E->getArg(1));
1043     llvm::Type *ArgType = Base->getType();
1044     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
1045     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1046   }
1047 
1048   case Builtin::BI__builtin_isgreater:
1049   case Builtin::BI__builtin_isgreaterequal:
1050   case Builtin::BI__builtin_isless:
1051   case Builtin::BI__builtin_islessequal:
1052   case Builtin::BI__builtin_islessgreater:
1053   case Builtin::BI__builtin_isunordered: {
1054     // Ordered comparisons: we know the arguments to these are matching scalar
1055     // floating point values.
1056     Value *LHS = EmitScalarExpr(E->getArg(0));
1057     Value *RHS = EmitScalarExpr(E->getArg(1));
1058 
1059     switch (BuiltinID) {
1060     default: llvm_unreachable("Unknown ordered comparison");
1061     case Builtin::BI__builtin_isgreater:
1062       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
1063       break;
1064     case Builtin::BI__builtin_isgreaterequal:
1065       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
1066       break;
1067     case Builtin::BI__builtin_isless:
1068       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
1069       break;
1070     case Builtin::BI__builtin_islessequal:
1071       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
1072       break;
1073     case Builtin::BI__builtin_islessgreater:
1074       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
1075       break;
1076     case Builtin::BI__builtin_isunordered:
1077       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
1078       break;
1079     }
1080     // ZExt bool to int type.
1081     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
1082   }
1083   case Builtin::BI__builtin_isnan: {
1084     Value *V = EmitScalarExpr(E->getArg(0));
1085     V = Builder.CreateFCmpUNO(V, V, "cmp");
1086     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1087   }
1088 
1089   case Builtin::BIfinite:
1090   case Builtin::BI__finite:
1091   case Builtin::BIfinitef:
1092   case Builtin::BI__finitef:
1093   case Builtin::BIfinitel:
1094   case Builtin::BI__finitel:
1095   case Builtin::BI__builtin_isinf:
1096   case Builtin::BI__builtin_isfinite: {
1097     // isinf(x)    --> fabs(x) == infinity
1098     // isfinite(x) --> fabs(x) != infinity
1099     // x != NaN via the ordered compare in either case.
1100     Value *V = EmitScalarExpr(E->getArg(0));
1101     Value *Fabs = EmitFAbs(*this, V);
1102     Constant *Infinity = ConstantFP::getInfinity(V->getType());
1103     CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
1104                                   ? CmpInst::FCMP_OEQ
1105                                   : CmpInst::FCMP_ONE;
1106     Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
1107     return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
1108   }
1109 
1110   case Builtin::BI__builtin_isinf_sign: {
1111     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
1112     Value *Arg = EmitScalarExpr(E->getArg(0));
1113     Value *AbsArg = EmitFAbs(*this, Arg);
1114     Value *IsInf = Builder.CreateFCmpOEQ(
1115         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
1116     Value *IsNeg = EmitSignBit(*this, Arg);
1117 
1118     llvm::Type *IntTy = ConvertType(E->getType());
1119     Value *Zero = Constant::getNullValue(IntTy);
1120     Value *One = ConstantInt::get(IntTy, 1);
1121     Value *NegativeOne = ConstantInt::get(IntTy, -1);
1122     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
1123     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
1124     return RValue::get(Result);
1125   }
1126 
1127   case Builtin::BI__builtin_isnormal: {
1128     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
1129     Value *V = EmitScalarExpr(E->getArg(0));
1130     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
1131 
1132     Value *Abs = EmitFAbs(*this, V);
1133     Value *IsLessThanInf =
1134       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
1135     APFloat Smallest = APFloat::getSmallestNormalized(
1136                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
1137     Value *IsNormal =
1138       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
1139                             "isnormal");
1140     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
1141     V = Builder.CreateAnd(V, IsNormal, "and");
1142     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1143   }
1144 
1145   case Builtin::BI__builtin_fpclassify: {
1146     Value *V = EmitScalarExpr(E->getArg(5));
1147     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
1148 
1149     // Create Result
1150     BasicBlock *Begin = Builder.GetInsertBlock();
1151     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
1152     Builder.SetInsertPoint(End);
1153     PHINode *Result =
1154       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
1155                         "fpclassify_result");
1156 
1157     // if (V==0) return FP_ZERO
1158     Builder.SetInsertPoint(Begin);
1159     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
1160                                           "iszero");
1161     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
1162     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
1163     Builder.CreateCondBr(IsZero, End, NotZero);
1164     Result->addIncoming(ZeroLiteral, Begin);
1165 
1166     // if (V != V) return FP_NAN
1167     Builder.SetInsertPoint(NotZero);
1168     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
1169     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
1170     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
1171     Builder.CreateCondBr(IsNan, End, NotNan);
1172     Result->addIncoming(NanLiteral, NotZero);
1173 
1174     // if (fabs(V) == infinity) return FP_INFINITY
1175     Builder.SetInsertPoint(NotNan);
1176     Value *VAbs = EmitFAbs(*this, V);
1177     Value *IsInf =
1178       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
1179                             "isinf");
1180     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
1181     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
1182     Builder.CreateCondBr(IsInf, End, NotInf);
1183     Result->addIncoming(InfLiteral, NotNan);
1184 
1185     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
1186     Builder.SetInsertPoint(NotInf);
1187     APFloat Smallest = APFloat::getSmallestNormalized(
1188         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
1189     Value *IsNormal =
1190       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
1191                             "isnormal");
1192     Value *NormalResult =
1193       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
1194                            EmitScalarExpr(E->getArg(3)));
1195     Builder.CreateBr(End);
1196     Result->addIncoming(NormalResult, NotInf);
1197 
1198     // return Result
1199     Builder.SetInsertPoint(End);
1200     return RValue::get(Result);
1201   }
1202 
1203   case Builtin::BIalloca:
1204   case Builtin::BI_alloca:
1205   case Builtin::BI__builtin_alloca: {
1206     Value *Size = EmitScalarExpr(E->getArg(0));
1207     const TargetInfo &TI = getContext().getTargetInfo();
1208     // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
1209     unsigned SuitableAlignmentInBytes =
1210         CGM.getContext()
1211             .toCharUnitsFromBits(TI.getSuitableAlign())
1212             .getQuantity();
1213     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1214     AI->setAlignment(SuitableAlignmentInBytes);
1215     return RValue::get(AI);
1216   }
1217 
1218   case Builtin::BI__builtin_alloca_with_align: {
1219     Value *Size = EmitScalarExpr(E->getArg(0));
1220     Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
1221     auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
1222     unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
1223     unsigned AlignmentInBytes =
1224         CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
1225     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1226     AI->setAlignment(AlignmentInBytes);
1227     return RValue::get(AI);
1228   }
1229 
1230   case Builtin::BIbzero:
1231   case Builtin::BI__builtin_bzero: {
1232     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1233     Value *SizeVal = EmitScalarExpr(E->getArg(1));
1234     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1235                         E->getArg(0)->getExprLoc(), FD, 0);
1236     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
1237     return RValue::get(Dest.getPointer());
1238   }
1239   case Builtin::BImemcpy:
1240   case Builtin::BI__builtin_memcpy: {
1241     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1242     Address Src = EmitPointerWithAlignment(E->getArg(1));
1243     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1244     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1245                         E->getArg(0)->getExprLoc(), FD, 0);
1246     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1247                         E->getArg(1)->getExprLoc(), FD, 1);
1248     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1249     return RValue::get(Dest.getPointer());
1250   }
1251 
1252   case Builtin::BI__builtin_char_memchr:
1253     BuiltinID = Builtin::BI__builtin_memchr;
1254     break;
1255 
1256   case Builtin::BI__builtin___memcpy_chk: {
1257     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
1258     llvm::APSInt Size, DstSize;
1259     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1260         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1261       break;
1262     if (Size.ugt(DstSize))
1263       break;
1264     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1265     Address Src = EmitPointerWithAlignment(E->getArg(1));
1266     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1267     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1268     return RValue::get(Dest.getPointer());
1269   }
1270 
1271   case Builtin::BI__builtin_objc_memmove_collectable: {
1272     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1273     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1274     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1275     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1276                                                   DestAddr, SrcAddr, SizeVal);
1277     return RValue::get(DestAddr.getPointer());
1278   }
1279 
1280   case Builtin::BI__builtin___memmove_chk: {
1281     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1282     llvm::APSInt Size, DstSize;
1283     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1284         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1285       break;
1286     if (Size.ugt(DstSize))
1287       break;
1288     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1289     Address Src = EmitPointerWithAlignment(E->getArg(1));
1290     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1291     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1292     return RValue::get(Dest.getPointer());
1293   }
1294 
1295   case Builtin::BImemmove:
1296   case Builtin::BI__builtin_memmove: {
1297     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1298     Address Src = EmitPointerWithAlignment(E->getArg(1));
1299     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1300     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1301                         E->getArg(0)->getExprLoc(), FD, 0);
1302     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1303                         E->getArg(1)->getExprLoc(), FD, 1);
1304     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1305     return RValue::get(Dest.getPointer());
1306   }
1307   case Builtin::BImemset:
1308   case Builtin::BI__builtin_memset: {
1309     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1310     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1311                                          Builder.getInt8Ty());
1312     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1313     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1314                         E->getArg(0)->getExprLoc(), FD, 0);
1315     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1316     return RValue::get(Dest.getPointer());
1317   }
1318   case Builtin::BI__builtin___memset_chk: {
1319     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1320     llvm::APSInt Size, DstSize;
1321     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1322         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1323       break;
1324     if (Size.ugt(DstSize))
1325       break;
1326     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1327     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1328                                          Builder.getInt8Ty());
1329     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1330     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1331     return RValue::get(Dest.getPointer());
1332   }
1333   case Builtin::BI__builtin_dwarf_cfa: {
1334     // The offset in bytes from the first argument to the CFA.
1335     //
1336     // Why on earth is this in the frontend?  Is there any reason at
1337     // all that the backend can't reasonably determine this while
1338     // lowering llvm.eh.dwarf.cfa()?
1339     //
1340     // TODO: If there's a satisfactory reason, add a target hook for
1341     // this instead of hard-coding 0, which is correct for most targets.
1342     int32_t Offset = 0;
1343 
1344     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1345     return RValue::get(Builder.CreateCall(F,
1346                                       llvm::ConstantInt::get(Int32Ty, Offset)));
1347   }
1348   case Builtin::BI__builtin_return_address: {
1349     Value *Depth =
1350         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1351     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1352     return RValue::get(Builder.CreateCall(F, Depth));
1353   }
1354   case Builtin::BI_ReturnAddress: {
1355     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1356     return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
1357   }
1358   case Builtin::BI__builtin_frame_address: {
1359     Value *Depth =
1360         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1361     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1362     return RValue::get(Builder.CreateCall(F, Depth));
1363   }
1364   case Builtin::BI__builtin_extract_return_addr: {
1365     Value *Address = EmitScalarExpr(E->getArg(0));
1366     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1367     return RValue::get(Result);
1368   }
1369   case Builtin::BI__builtin_frob_return_addr: {
1370     Value *Address = EmitScalarExpr(E->getArg(0));
1371     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1372     return RValue::get(Result);
1373   }
1374   case Builtin::BI__builtin_dwarf_sp_column: {
1375     llvm::IntegerType *Ty
1376       = cast<llvm::IntegerType>(ConvertType(E->getType()));
1377     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1378     if (Column == -1) {
1379       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1380       return RValue::get(llvm::UndefValue::get(Ty));
1381     }
1382     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1383   }
1384   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1385     Value *Address = EmitScalarExpr(E->getArg(0));
1386     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1387       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1388     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1389   }
1390   case Builtin::BI__builtin_eh_return: {
1391     Value *Int = EmitScalarExpr(E->getArg(0));
1392     Value *Ptr = EmitScalarExpr(E->getArg(1));
1393 
1394     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1395     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1396            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1397     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1398                                   ? Intrinsic::eh_return_i32
1399                                   : Intrinsic::eh_return_i64);
1400     Builder.CreateCall(F, {Int, Ptr});
1401     Builder.CreateUnreachable();
1402 
1403     // We do need to preserve an insertion point.
1404     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1405 
1406     return RValue::get(nullptr);
1407   }
1408   case Builtin::BI__builtin_unwind_init: {
1409     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1410     return RValue::get(Builder.CreateCall(F));
1411   }
1412   case Builtin::BI__builtin_extend_pointer: {
1413     // Extends a pointer to the size of an _Unwind_Word, which is
1414     // uint64_t on all platforms.  Generally this gets poked into a
1415     // register and eventually used as an address, so if the
1416     // addressing registers are wider than pointers and the platform
1417     // doesn't implicitly ignore high-order bits when doing
1418     // addressing, we need to make sure we zext / sext based on
1419     // the platform's expectations.
1420     //
1421     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1422 
1423     // Cast the pointer to intptr_t.
1424     Value *Ptr = EmitScalarExpr(E->getArg(0));
1425     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1426 
1427     // If that's 64 bits, we're done.
1428     if (IntPtrTy->getBitWidth() == 64)
1429       return RValue::get(Result);
1430 
1431     // Otherwise, ask the codegen data what to do.
1432     if (getTargetHooks().extendPointerWithSExt())
1433       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1434     else
1435       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1436   }
1437   case Builtin::BI__builtin_setjmp: {
1438     // Buffer is a void**.
1439     Address Buf = EmitPointerWithAlignment(E->getArg(0));
1440 
1441     // Store the frame pointer to the setjmp buffer.
1442     Value *FrameAddr =
1443       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1444                          ConstantInt::get(Int32Ty, 0));
1445     Builder.CreateStore(FrameAddr, Buf);
1446 
1447     // Store the stack pointer to the setjmp buffer.
1448     Value *StackAddr =
1449         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1450     Address StackSaveSlot =
1451       Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1452     Builder.CreateStore(StackAddr, StackSaveSlot);
1453 
1454     // Call LLVM's EH setjmp, which is lightweight.
1455     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1456     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1457     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1458   }
1459   case Builtin::BI__builtin_longjmp: {
1460     Value *Buf = EmitScalarExpr(E->getArg(0));
1461     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1462 
1463     // Call LLVM's EH longjmp, which is lightweight.
1464     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1465 
1466     // longjmp doesn't return; mark this as unreachable.
1467     Builder.CreateUnreachable();
1468 
1469     // We do need to preserve an insertion point.
1470     EmitBlock(createBasicBlock("longjmp.cont"));
1471 
1472     return RValue::get(nullptr);
1473   }
1474   case Builtin::BI__sync_fetch_and_add:
1475   case Builtin::BI__sync_fetch_and_sub:
1476   case Builtin::BI__sync_fetch_and_or:
1477   case Builtin::BI__sync_fetch_and_and:
1478   case Builtin::BI__sync_fetch_and_xor:
1479   case Builtin::BI__sync_fetch_and_nand:
1480   case Builtin::BI__sync_add_and_fetch:
1481   case Builtin::BI__sync_sub_and_fetch:
1482   case Builtin::BI__sync_and_and_fetch:
1483   case Builtin::BI__sync_or_and_fetch:
1484   case Builtin::BI__sync_xor_and_fetch:
1485   case Builtin::BI__sync_nand_and_fetch:
1486   case Builtin::BI__sync_val_compare_and_swap:
1487   case Builtin::BI__sync_bool_compare_and_swap:
1488   case Builtin::BI__sync_lock_test_and_set:
1489   case Builtin::BI__sync_lock_release:
1490   case Builtin::BI__sync_swap:
1491     llvm_unreachable("Shouldn't make it through sema");
1492   case Builtin::BI__sync_fetch_and_add_1:
1493   case Builtin::BI__sync_fetch_and_add_2:
1494   case Builtin::BI__sync_fetch_and_add_4:
1495   case Builtin::BI__sync_fetch_and_add_8:
1496   case Builtin::BI__sync_fetch_and_add_16:
1497     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1498   case Builtin::BI__sync_fetch_and_sub_1:
1499   case Builtin::BI__sync_fetch_and_sub_2:
1500   case Builtin::BI__sync_fetch_and_sub_4:
1501   case Builtin::BI__sync_fetch_and_sub_8:
1502   case Builtin::BI__sync_fetch_and_sub_16:
1503     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1504   case Builtin::BI__sync_fetch_and_or_1:
1505   case Builtin::BI__sync_fetch_and_or_2:
1506   case Builtin::BI__sync_fetch_and_or_4:
1507   case Builtin::BI__sync_fetch_and_or_8:
1508   case Builtin::BI__sync_fetch_and_or_16:
1509     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1510   case Builtin::BI__sync_fetch_and_and_1:
1511   case Builtin::BI__sync_fetch_and_and_2:
1512   case Builtin::BI__sync_fetch_and_and_4:
1513   case Builtin::BI__sync_fetch_and_and_8:
1514   case Builtin::BI__sync_fetch_and_and_16:
1515     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1516   case Builtin::BI__sync_fetch_and_xor_1:
1517   case Builtin::BI__sync_fetch_and_xor_2:
1518   case Builtin::BI__sync_fetch_and_xor_4:
1519   case Builtin::BI__sync_fetch_and_xor_8:
1520   case Builtin::BI__sync_fetch_and_xor_16:
1521     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1522   case Builtin::BI__sync_fetch_and_nand_1:
1523   case Builtin::BI__sync_fetch_and_nand_2:
1524   case Builtin::BI__sync_fetch_and_nand_4:
1525   case Builtin::BI__sync_fetch_and_nand_8:
1526   case Builtin::BI__sync_fetch_and_nand_16:
1527     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1528 
1529   // Clang extensions: not overloaded yet.
1530   case Builtin::BI__sync_fetch_and_min:
1531     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1532   case Builtin::BI__sync_fetch_and_max:
1533     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1534   case Builtin::BI__sync_fetch_and_umin:
1535     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1536   case Builtin::BI__sync_fetch_and_umax:
1537     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1538 
1539   case Builtin::BI__sync_add_and_fetch_1:
1540   case Builtin::BI__sync_add_and_fetch_2:
1541   case Builtin::BI__sync_add_and_fetch_4:
1542   case Builtin::BI__sync_add_and_fetch_8:
1543   case Builtin::BI__sync_add_and_fetch_16:
1544     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1545                                 llvm::Instruction::Add);
1546   case Builtin::BI__sync_sub_and_fetch_1:
1547   case Builtin::BI__sync_sub_and_fetch_2:
1548   case Builtin::BI__sync_sub_and_fetch_4:
1549   case Builtin::BI__sync_sub_and_fetch_8:
1550   case Builtin::BI__sync_sub_and_fetch_16:
1551     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1552                                 llvm::Instruction::Sub);
1553   case Builtin::BI__sync_and_and_fetch_1:
1554   case Builtin::BI__sync_and_and_fetch_2:
1555   case Builtin::BI__sync_and_and_fetch_4:
1556   case Builtin::BI__sync_and_and_fetch_8:
1557   case Builtin::BI__sync_and_and_fetch_16:
1558     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1559                                 llvm::Instruction::And);
1560   case Builtin::BI__sync_or_and_fetch_1:
1561   case Builtin::BI__sync_or_and_fetch_2:
1562   case Builtin::BI__sync_or_and_fetch_4:
1563   case Builtin::BI__sync_or_and_fetch_8:
1564   case Builtin::BI__sync_or_and_fetch_16:
1565     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1566                                 llvm::Instruction::Or);
1567   case Builtin::BI__sync_xor_and_fetch_1:
1568   case Builtin::BI__sync_xor_and_fetch_2:
1569   case Builtin::BI__sync_xor_and_fetch_4:
1570   case Builtin::BI__sync_xor_and_fetch_8:
1571   case Builtin::BI__sync_xor_and_fetch_16:
1572     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1573                                 llvm::Instruction::Xor);
1574   case Builtin::BI__sync_nand_and_fetch_1:
1575   case Builtin::BI__sync_nand_and_fetch_2:
1576   case Builtin::BI__sync_nand_and_fetch_4:
1577   case Builtin::BI__sync_nand_and_fetch_8:
1578   case Builtin::BI__sync_nand_and_fetch_16:
1579     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1580                                 llvm::Instruction::And, true);
1581 
1582   case Builtin::BI__sync_val_compare_and_swap_1:
1583   case Builtin::BI__sync_val_compare_and_swap_2:
1584   case Builtin::BI__sync_val_compare_and_swap_4:
1585   case Builtin::BI__sync_val_compare_and_swap_8:
1586   case Builtin::BI__sync_val_compare_and_swap_16:
1587     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1588 
1589   case Builtin::BI__sync_bool_compare_and_swap_1:
1590   case Builtin::BI__sync_bool_compare_and_swap_2:
1591   case Builtin::BI__sync_bool_compare_and_swap_4:
1592   case Builtin::BI__sync_bool_compare_and_swap_8:
1593   case Builtin::BI__sync_bool_compare_and_swap_16:
1594     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1595 
1596   case Builtin::BI__sync_swap_1:
1597   case Builtin::BI__sync_swap_2:
1598   case Builtin::BI__sync_swap_4:
1599   case Builtin::BI__sync_swap_8:
1600   case Builtin::BI__sync_swap_16:
1601     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1602 
1603   case Builtin::BI__sync_lock_test_and_set_1:
1604   case Builtin::BI__sync_lock_test_and_set_2:
1605   case Builtin::BI__sync_lock_test_and_set_4:
1606   case Builtin::BI__sync_lock_test_and_set_8:
1607   case Builtin::BI__sync_lock_test_and_set_16:
1608     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1609 
1610   case Builtin::BI__sync_lock_release_1:
1611   case Builtin::BI__sync_lock_release_2:
1612   case Builtin::BI__sync_lock_release_4:
1613   case Builtin::BI__sync_lock_release_8:
1614   case Builtin::BI__sync_lock_release_16: {
1615     Value *Ptr = EmitScalarExpr(E->getArg(0));
1616     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1617     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1618     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1619                                              StoreSize.getQuantity() * 8);
1620     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1621     llvm::StoreInst *Store =
1622       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1623                                  StoreSize);
1624     Store->setAtomic(llvm::AtomicOrdering::Release);
1625     return RValue::get(nullptr);
1626   }
1627 
1628   case Builtin::BI__sync_synchronize: {
1629     // We assume this is supposed to correspond to a C++0x-style
1630     // sequentially-consistent fence (i.e. this is only usable for
1631     // synchonization, not device I/O or anything like that). This intrinsic
1632     // is really badly designed in the sense that in theory, there isn't
1633     // any way to safely use it... but in practice, it mostly works
1634     // to use it with non-atomic loads and stores to get acquire/release
1635     // semantics.
1636     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1637     return RValue::get(nullptr);
1638   }
1639 
1640   case Builtin::BI__builtin_nontemporal_load:
1641     return RValue::get(EmitNontemporalLoad(*this, E));
1642   case Builtin::BI__builtin_nontemporal_store:
1643     return RValue::get(EmitNontemporalStore(*this, E));
1644   case Builtin::BI__c11_atomic_is_lock_free:
1645   case Builtin::BI__atomic_is_lock_free: {
1646     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1647     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1648     // _Atomic(T) is always properly-aligned.
1649     const char *LibCallName = "__atomic_is_lock_free";
1650     CallArgList Args;
1651     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1652              getContext().getSizeType());
1653     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1654       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1655                getContext().VoidPtrTy);
1656     else
1657       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1658                getContext().VoidPtrTy);
1659     const CGFunctionInfo &FuncInfo =
1660         CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1661     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1662     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1663     return EmitCall(FuncInfo, CGCallee::forDirect(Func),
1664                     ReturnValueSlot(), Args);
1665   }
1666 
1667   case Builtin::BI__atomic_test_and_set: {
1668     // Look at the argument type to determine whether this is a volatile
1669     // operation. The parameter type is always volatile.
1670     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1671     bool Volatile =
1672         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1673 
1674     Value *Ptr = EmitScalarExpr(E->getArg(0));
1675     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1676     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1677     Value *NewVal = Builder.getInt8(1);
1678     Value *Order = EmitScalarExpr(E->getArg(1));
1679     if (isa<llvm::ConstantInt>(Order)) {
1680       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1681       AtomicRMWInst *Result = nullptr;
1682       switch (ord) {
1683       case 0:  // memory_order_relaxed
1684       default: // invalid order
1685         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1686                                          llvm::AtomicOrdering::Monotonic);
1687         break;
1688       case 1: // memory_order_consume
1689       case 2: // memory_order_acquire
1690         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1691                                          llvm::AtomicOrdering::Acquire);
1692         break;
1693       case 3: // memory_order_release
1694         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1695                                          llvm::AtomicOrdering::Release);
1696         break;
1697       case 4: // memory_order_acq_rel
1698 
1699         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1700                                          llvm::AtomicOrdering::AcquireRelease);
1701         break;
1702       case 5: // memory_order_seq_cst
1703         Result = Builder.CreateAtomicRMW(
1704             llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1705             llvm::AtomicOrdering::SequentiallyConsistent);
1706         break;
1707       }
1708       Result->setVolatile(Volatile);
1709       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1710     }
1711 
1712     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1713 
1714     llvm::BasicBlock *BBs[5] = {
1715       createBasicBlock("monotonic", CurFn),
1716       createBasicBlock("acquire", CurFn),
1717       createBasicBlock("release", CurFn),
1718       createBasicBlock("acqrel", CurFn),
1719       createBasicBlock("seqcst", CurFn)
1720     };
1721     llvm::AtomicOrdering Orders[5] = {
1722         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1723         llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1724         llvm::AtomicOrdering::SequentiallyConsistent};
1725 
1726     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1727     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1728 
1729     Builder.SetInsertPoint(ContBB);
1730     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1731 
1732     for (unsigned i = 0; i < 5; ++i) {
1733       Builder.SetInsertPoint(BBs[i]);
1734       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1735                                                    Ptr, NewVal, Orders[i]);
1736       RMW->setVolatile(Volatile);
1737       Result->addIncoming(RMW, BBs[i]);
1738       Builder.CreateBr(ContBB);
1739     }
1740 
1741     SI->addCase(Builder.getInt32(0), BBs[0]);
1742     SI->addCase(Builder.getInt32(1), BBs[1]);
1743     SI->addCase(Builder.getInt32(2), BBs[1]);
1744     SI->addCase(Builder.getInt32(3), BBs[2]);
1745     SI->addCase(Builder.getInt32(4), BBs[3]);
1746     SI->addCase(Builder.getInt32(5), BBs[4]);
1747 
1748     Builder.SetInsertPoint(ContBB);
1749     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1750   }
1751 
1752   case Builtin::BI__atomic_clear: {
1753     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1754     bool Volatile =
1755         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1756 
1757     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1758     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1759     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1760     Value *NewVal = Builder.getInt8(0);
1761     Value *Order = EmitScalarExpr(E->getArg(1));
1762     if (isa<llvm::ConstantInt>(Order)) {
1763       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1764       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1765       switch (ord) {
1766       case 0:  // memory_order_relaxed
1767       default: // invalid order
1768         Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1769         break;
1770       case 3:  // memory_order_release
1771         Store->setOrdering(llvm::AtomicOrdering::Release);
1772         break;
1773       case 5:  // memory_order_seq_cst
1774         Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1775         break;
1776       }
1777       return RValue::get(nullptr);
1778     }
1779 
1780     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1781 
1782     llvm::BasicBlock *BBs[3] = {
1783       createBasicBlock("monotonic", CurFn),
1784       createBasicBlock("release", CurFn),
1785       createBasicBlock("seqcst", CurFn)
1786     };
1787     llvm::AtomicOrdering Orders[3] = {
1788         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1789         llvm::AtomicOrdering::SequentiallyConsistent};
1790 
1791     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1792     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1793 
1794     for (unsigned i = 0; i < 3; ++i) {
1795       Builder.SetInsertPoint(BBs[i]);
1796       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1797       Store->setOrdering(Orders[i]);
1798       Builder.CreateBr(ContBB);
1799     }
1800 
1801     SI->addCase(Builder.getInt32(0), BBs[0]);
1802     SI->addCase(Builder.getInt32(3), BBs[1]);
1803     SI->addCase(Builder.getInt32(5), BBs[2]);
1804 
1805     Builder.SetInsertPoint(ContBB);
1806     return RValue::get(nullptr);
1807   }
1808 
1809   case Builtin::BI__atomic_thread_fence:
1810   case Builtin::BI__atomic_signal_fence:
1811   case Builtin::BI__c11_atomic_thread_fence:
1812   case Builtin::BI__c11_atomic_signal_fence: {
1813     llvm::SynchronizationScope Scope;
1814     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1815         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1816       Scope = llvm::SingleThread;
1817     else
1818       Scope = llvm::CrossThread;
1819     Value *Order = EmitScalarExpr(E->getArg(0));
1820     if (isa<llvm::ConstantInt>(Order)) {
1821       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1822       switch (ord) {
1823       case 0:  // memory_order_relaxed
1824       default: // invalid order
1825         break;
1826       case 1:  // memory_order_consume
1827       case 2:  // memory_order_acquire
1828         Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1829         break;
1830       case 3:  // memory_order_release
1831         Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1832         break;
1833       case 4:  // memory_order_acq_rel
1834         Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1835         break;
1836       case 5:  // memory_order_seq_cst
1837         Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
1838                             Scope);
1839         break;
1840       }
1841       return RValue::get(nullptr);
1842     }
1843 
1844     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1845     AcquireBB = createBasicBlock("acquire", CurFn);
1846     ReleaseBB = createBasicBlock("release", CurFn);
1847     AcqRelBB = createBasicBlock("acqrel", CurFn);
1848     SeqCstBB = createBasicBlock("seqcst", CurFn);
1849     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1850 
1851     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1852     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1853 
1854     Builder.SetInsertPoint(AcquireBB);
1855     Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1856     Builder.CreateBr(ContBB);
1857     SI->addCase(Builder.getInt32(1), AcquireBB);
1858     SI->addCase(Builder.getInt32(2), AcquireBB);
1859 
1860     Builder.SetInsertPoint(ReleaseBB);
1861     Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1862     Builder.CreateBr(ContBB);
1863     SI->addCase(Builder.getInt32(3), ReleaseBB);
1864 
1865     Builder.SetInsertPoint(AcqRelBB);
1866     Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1867     Builder.CreateBr(ContBB);
1868     SI->addCase(Builder.getInt32(4), AcqRelBB);
1869 
1870     Builder.SetInsertPoint(SeqCstBB);
1871     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope);
1872     Builder.CreateBr(ContBB);
1873     SI->addCase(Builder.getInt32(5), SeqCstBB);
1874 
1875     Builder.SetInsertPoint(ContBB);
1876     return RValue::get(nullptr);
1877   }
1878 
1879     // Library functions with special handling.
1880   case Builtin::BIsqrt:
1881   case Builtin::BIsqrtf:
1882   case Builtin::BIsqrtl: {
1883     // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1884     // in finite- or unsafe-math mode (the intrinsic has different semantics
1885     // for handling negative numbers compared to the library function, so
1886     // -fmath-errno=0 is not enough).
1887     if (!FD->hasAttr<ConstAttr>())
1888       break;
1889     if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1890           CGM.getCodeGenOpts().NoNaNsFPMath))
1891       break;
1892     Value *Arg0 = EmitScalarExpr(E->getArg(0));
1893     llvm::Type *ArgType = Arg0->getType();
1894     Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1895     return RValue::get(Builder.CreateCall(F, Arg0));
1896   }
1897 
1898   case Builtin::BI__builtin_pow:
1899   case Builtin::BI__builtin_powf:
1900   case Builtin::BI__builtin_powl:
1901   case Builtin::BIpow:
1902   case Builtin::BIpowf:
1903   case Builtin::BIpowl: {
1904     // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1905     if (!FD->hasAttr<ConstAttr>())
1906       break;
1907     Value *Base = EmitScalarExpr(E->getArg(0));
1908     Value *Exponent = EmitScalarExpr(E->getArg(1));
1909     llvm::Type *ArgType = Base->getType();
1910     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1911     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1912   }
1913 
1914   case Builtin::BIfma:
1915   case Builtin::BIfmaf:
1916   case Builtin::BIfmal:
1917   case Builtin::BI__builtin_fma:
1918   case Builtin::BI__builtin_fmaf:
1919   case Builtin::BI__builtin_fmal: {
1920     // Rewrite fma to intrinsic.
1921     Value *FirstArg = EmitScalarExpr(E->getArg(0));
1922     llvm::Type *ArgType = FirstArg->getType();
1923     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1924     return RValue::get(
1925         Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1926                                EmitScalarExpr(E->getArg(2))}));
1927   }
1928 
1929   case Builtin::BI__builtin_signbit:
1930   case Builtin::BI__builtin_signbitf:
1931   case Builtin::BI__builtin_signbitl: {
1932     return RValue::get(
1933         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1934                            ConvertType(E->getType())));
1935   }
1936   case Builtin::BI__builtin_annotation: {
1937     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1938     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1939                                       AnnVal->getType());
1940 
1941     // Get the annotation string, go through casts. Sema requires this to be a
1942     // non-wide string literal, potentially casted, so the cast<> is safe.
1943     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1944     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1945     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1946   }
1947   case Builtin::BI__builtin_addcb:
1948   case Builtin::BI__builtin_addcs:
1949   case Builtin::BI__builtin_addc:
1950   case Builtin::BI__builtin_addcl:
1951   case Builtin::BI__builtin_addcll:
1952   case Builtin::BI__builtin_subcb:
1953   case Builtin::BI__builtin_subcs:
1954   case Builtin::BI__builtin_subc:
1955   case Builtin::BI__builtin_subcl:
1956   case Builtin::BI__builtin_subcll: {
1957 
1958     // We translate all of these builtins from expressions of the form:
1959     //   int x = ..., y = ..., carryin = ..., carryout, result;
1960     //   result = __builtin_addc(x, y, carryin, &carryout);
1961     //
1962     // to LLVM IR of the form:
1963     //
1964     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1965     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1966     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
1967     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1968     //                                                       i32 %carryin)
1969     //   %result = extractvalue {i32, i1} %tmp2, 0
1970     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
1971     //   %tmp3 = or i1 %carry1, %carry2
1972     //   %tmp4 = zext i1 %tmp3 to i32
1973     //   store i32 %tmp4, i32* %carryout
1974 
1975     // Scalarize our inputs.
1976     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1977     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1978     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1979     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
1980 
1981     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1982     llvm::Intrinsic::ID IntrinsicId;
1983     switch (BuiltinID) {
1984     default: llvm_unreachable("Unknown multiprecision builtin id.");
1985     case Builtin::BI__builtin_addcb:
1986     case Builtin::BI__builtin_addcs:
1987     case Builtin::BI__builtin_addc:
1988     case Builtin::BI__builtin_addcl:
1989     case Builtin::BI__builtin_addcll:
1990       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1991       break;
1992     case Builtin::BI__builtin_subcb:
1993     case Builtin::BI__builtin_subcs:
1994     case Builtin::BI__builtin_subc:
1995     case Builtin::BI__builtin_subcl:
1996     case Builtin::BI__builtin_subcll:
1997       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1998       break;
1999     }
2000 
2001     // Construct our resulting LLVM IR expression.
2002     llvm::Value *Carry1;
2003     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
2004                                               X, Y, Carry1);
2005     llvm::Value *Carry2;
2006     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
2007                                               Sum1, Carryin, Carry2);
2008     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
2009                                                X->getType());
2010     Builder.CreateStore(CarryOut, CarryOutPtr);
2011     return RValue::get(Sum2);
2012   }
2013 
2014   case Builtin::BI__builtin_add_overflow:
2015   case Builtin::BI__builtin_sub_overflow:
2016   case Builtin::BI__builtin_mul_overflow: {
2017     const clang::Expr *LeftArg = E->getArg(0);
2018     const clang::Expr *RightArg = E->getArg(1);
2019     const clang::Expr *ResultArg = E->getArg(2);
2020 
2021     clang::QualType ResultQTy =
2022         ResultArg->getType()->castAs<PointerType>()->getPointeeType();
2023 
2024     WidthAndSignedness LeftInfo =
2025         getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
2026     WidthAndSignedness RightInfo =
2027         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
2028     WidthAndSignedness ResultInfo =
2029         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
2030     WidthAndSignedness EncompassingInfo =
2031         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
2032 
2033     llvm::Type *EncompassingLLVMTy =
2034         llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
2035 
2036     llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
2037 
2038     llvm::Intrinsic::ID IntrinsicId;
2039     switch (BuiltinID) {
2040     default:
2041       llvm_unreachable("Unknown overflow builtin id.");
2042     case Builtin::BI__builtin_add_overflow:
2043       IntrinsicId = EncompassingInfo.Signed
2044                         ? llvm::Intrinsic::sadd_with_overflow
2045                         : llvm::Intrinsic::uadd_with_overflow;
2046       break;
2047     case Builtin::BI__builtin_sub_overflow:
2048       IntrinsicId = EncompassingInfo.Signed
2049                         ? llvm::Intrinsic::ssub_with_overflow
2050                         : llvm::Intrinsic::usub_with_overflow;
2051       break;
2052     case Builtin::BI__builtin_mul_overflow:
2053       IntrinsicId = EncompassingInfo.Signed
2054                         ? llvm::Intrinsic::smul_with_overflow
2055                         : llvm::Intrinsic::umul_with_overflow;
2056       break;
2057     }
2058 
2059     llvm::Value *Left = EmitScalarExpr(LeftArg);
2060     llvm::Value *Right = EmitScalarExpr(RightArg);
2061     Address ResultPtr = EmitPointerWithAlignment(ResultArg);
2062 
2063     // Extend each operand to the encompassing type.
2064     Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
2065     Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
2066 
2067     // Perform the operation on the extended values.
2068     llvm::Value *Overflow, *Result;
2069     Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
2070 
2071     if (EncompassingInfo.Width > ResultInfo.Width) {
2072       // The encompassing type is wider than the result type, so we need to
2073       // truncate it.
2074       llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
2075 
2076       // To see if the truncation caused an overflow, we will extend
2077       // the result and then compare it to the original result.
2078       llvm::Value *ResultTruncExt = Builder.CreateIntCast(
2079           ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
2080       llvm::Value *TruncationOverflow =
2081           Builder.CreateICmpNE(Result, ResultTruncExt);
2082 
2083       Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
2084       Result = ResultTrunc;
2085     }
2086 
2087     // Finally, store the result using the pointer.
2088     bool isVolatile =
2089       ResultArg->getType()->getPointeeType().isVolatileQualified();
2090     Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
2091 
2092     return RValue::get(Overflow);
2093   }
2094 
2095   case Builtin::BI__builtin_uadd_overflow:
2096   case Builtin::BI__builtin_uaddl_overflow:
2097   case Builtin::BI__builtin_uaddll_overflow:
2098   case Builtin::BI__builtin_usub_overflow:
2099   case Builtin::BI__builtin_usubl_overflow:
2100   case Builtin::BI__builtin_usubll_overflow:
2101   case Builtin::BI__builtin_umul_overflow:
2102   case Builtin::BI__builtin_umull_overflow:
2103   case Builtin::BI__builtin_umulll_overflow:
2104   case Builtin::BI__builtin_sadd_overflow:
2105   case Builtin::BI__builtin_saddl_overflow:
2106   case Builtin::BI__builtin_saddll_overflow:
2107   case Builtin::BI__builtin_ssub_overflow:
2108   case Builtin::BI__builtin_ssubl_overflow:
2109   case Builtin::BI__builtin_ssubll_overflow:
2110   case Builtin::BI__builtin_smul_overflow:
2111   case Builtin::BI__builtin_smull_overflow:
2112   case Builtin::BI__builtin_smulll_overflow: {
2113 
2114     // We translate all of these builtins directly to the relevant llvm IR node.
2115 
2116     // Scalarize our inputs.
2117     llvm::Value *X = EmitScalarExpr(E->getArg(0));
2118     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2119     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
2120 
2121     // Decide which of the overflow intrinsics we are lowering to:
2122     llvm::Intrinsic::ID IntrinsicId;
2123     switch (BuiltinID) {
2124     default: llvm_unreachable("Unknown overflow builtin id.");
2125     case Builtin::BI__builtin_uadd_overflow:
2126     case Builtin::BI__builtin_uaddl_overflow:
2127     case Builtin::BI__builtin_uaddll_overflow:
2128       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2129       break;
2130     case Builtin::BI__builtin_usub_overflow:
2131     case Builtin::BI__builtin_usubl_overflow:
2132     case Builtin::BI__builtin_usubll_overflow:
2133       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2134       break;
2135     case Builtin::BI__builtin_umul_overflow:
2136     case Builtin::BI__builtin_umull_overflow:
2137     case Builtin::BI__builtin_umulll_overflow:
2138       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
2139       break;
2140     case Builtin::BI__builtin_sadd_overflow:
2141     case Builtin::BI__builtin_saddl_overflow:
2142     case Builtin::BI__builtin_saddll_overflow:
2143       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
2144       break;
2145     case Builtin::BI__builtin_ssub_overflow:
2146     case Builtin::BI__builtin_ssubl_overflow:
2147     case Builtin::BI__builtin_ssubll_overflow:
2148       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
2149       break;
2150     case Builtin::BI__builtin_smul_overflow:
2151     case Builtin::BI__builtin_smull_overflow:
2152     case Builtin::BI__builtin_smulll_overflow:
2153       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
2154       break;
2155     }
2156 
2157 
2158     llvm::Value *Carry;
2159     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
2160     Builder.CreateStore(Sum, SumOutPtr);
2161 
2162     return RValue::get(Carry);
2163   }
2164   case Builtin::BI__builtin_addressof:
2165     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
2166   case Builtin::BI__builtin_operator_new:
2167     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2168                                     E->getArg(0), false);
2169   case Builtin::BI__builtin_operator_delete:
2170     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2171                                     E->getArg(0), true);
2172   case Builtin::BI__noop:
2173     // __noop always evaluates to an integer literal zero.
2174     return RValue::get(ConstantInt::get(IntTy, 0));
2175   case Builtin::BI__builtin_call_with_static_chain: {
2176     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
2177     const Expr *Chain = E->getArg(1);
2178     return EmitCall(Call->getCallee()->getType(),
2179                     EmitCallee(Call->getCallee()), Call, ReturnValue,
2180                     EmitScalarExpr(Chain));
2181   }
2182   case Builtin::BI_InterlockedExchange8:
2183   case Builtin::BI_InterlockedExchange16:
2184   case Builtin::BI_InterlockedExchange:
2185   case Builtin::BI_InterlockedExchangePointer:
2186     return RValue::get(
2187         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
2188   case Builtin::BI_InterlockedCompareExchangePointer: {
2189     llvm::Type *RTy;
2190     llvm::IntegerType *IntType =
2191       IntegerType::get(getLLVMContext(),
2192                        getContext().getTypeSize(E->getType()));
2193     llvm::Type *IntPtrType = IntType->getPointerTo();
2194 
2195     llvm::Value *Destination =
2196       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
2197 
2198     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
2199     RTy = Exchange->getType();
2200     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
2201 
2202     llvm::Value *Comparand =
2203       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
2204 
2205     auto Result =
2206         Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
2207                                     AtomicOrdering::SequentiallyConsistent,
2208                                     AtomicOrdering::SequentiallyConsistent);
2209     Result->setVolatile(true);
2210 
2211     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
2212                                                                          0),
2213                                               RTy));
2214   }
2215   case Builtin::BI_InterlockedCompareExchange8:
2216   case Builtin::BI_InterlockedCompareExchange16:
2217   case Builtin::BI_InterlockedCompareExchange:
2218   case Builtin::BI_InterlockedCompareExchange64: {
2219     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
2220         EmitScalarExpr(E->getArg(0)),
2221         EmitScalarExpr(E->getArg(2)),
2222         EmitScalarExpr(E->getArg(1)),
2223         AtomicOrdering::SequentiallyConsistent,
2224         AtomicOrdering::SequentiallyConsistent);
2225       CXI->setVolatile(true);
2226       return RValue::get(Builder.CreateExtractValue(CXI, 0));
2227   }
2228   case Builtin::BI_InterlockedIncrement16:
2229   case Builtin::BI_InterlockedIncrement:
2230     return RValue::get(
2231         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
2232   case Builtin::BI_InterlockedDecrement16:
2233   case Builtin::BI_InterlockedDecrement:
2234     return RValue::get(
2235         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
2236   case Builtin::BI_InterlockedAnd8:
2237   case Builtin::BI_InterlockedAnd16:
2238   case Builtin::BI_InterlockedAnd:
2239     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
2240   case Builtin::BI_InterlockedExchangeAdd8:
2241   case Builtin::BI_InterlockedExchangeAdd16:
2242   case Builtin::BI_InterlockedExchangeAdd:
2243     return RValue::get(
2244         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
2245   case Builtin::BI_InterlockedExchangeSub8:
2246   case Builtin::BI_InterlockedExchangeSub16:
2247   case Builtin::BI_InterlockedExchangeSub:
2248     return RValue::get(
2249         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
2250   case Builtin::BI_InterlockedOr8:
2251   case Builtin::BI_InterlockedOr16:
2252   case Builtin::BI_InterlockedOr:
2253     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
2254   case Builtin::BI_InterlockedXor8:
2255   case Builtin::BI_InterlockedXor16:
2256   case Builtin::BI_InterlockedXor:
2257     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
2258   case Builtin::BI_interlockedbittestandset:
2259     return RValue::get(
2260         EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
2261 
2262   case Builtin::BI__exception_code:
2263   case Builtin::BI_exception_code:
2264     return RValue::get(EmitSEHExceptionCode());
2265   case Builtin::BI__exception_info:
2266   case Builtin::BI_exception_info:
2267     return RValue::get(EmitSEHExceptionInfo());
2268   case Builtin::BI__abnormal_termination:
2269   case Builtin::BI_abnormal_termination:
2270     return RValue::get(EmitSEHAbnormalTermination());
2271   case Builtin::BI_setjmpex: {
2272     if (getTarget().getTriple().isOSMSVCRT()) {
2273       llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2274       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2275           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2276           llvm::Attribute::ReturnsTwice);
2277       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2278           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2279           "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
2280       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2281           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2282       llvm::Value *FrameAddr =
2283           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2284                              ConstantInt::get(Int32Ty, 0));
2285       llvm::Value *Args[] = {Buf, FrameAddr};
2286       llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2287       CS.setAttributes(ReturnsTwiceAttr);
2288       return RValue::get(CS.getInstruction());
2289     }
2290     break;
2291   }
2292   case Builtin::BI_setjmp: {
2293     if (getTarget().getTriple().isOSMSVCRT()) {
2294       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2295           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2296           llvm::Attribute::ReturnsTwice);
2297       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2298           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2299       llvm::CallSite CS;
2300       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2301         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2302         llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2303             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2304             "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
2305         llvm::Value *Count = ConstantInt::get(IntTy, 0);
2306         llvm::Value *Args[] = {Buf, Count};
2307         CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2308       } else {
2309         llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2310         llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2311             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2312             "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
2313         llvm::Value *FrameAddr =
2314             Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2315                                ConstantInt::get(Int32Ty, 0));
2316         llvm::Value *Args[] = {Buf, FrameAddr};
2317         CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2318       }
2319       CS.setAttributes(ReturnsTwiceAttr);
2320       return RValue::get(CS.getInstruction());
2321     }
2322     break;
2323   }
2324 
2325   case Builtin::BI__GetExceptionInfo: {
2326     if (llvm::GlobalVariable *GV =
2327             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2328       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2329     break;
2330   }
2331 
2332   case Builtin::BI__fastfail:
2333     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
2334 
2335   case Builtin::BI__builtin_coro_size: {
2336     auto & Context = getContext();
2337     auto SizeTy = Context.getSizeType();
2338     auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
2339     Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
2340     return RValue::get(Builder.CreateCall(F));
2341   }
2342 
2343   case Builtin::BI__builtin_coro_id:
2344     return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
2345   case Builtin::BI__builtin_coro_promise:
2346     return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
2347   case Builtin::BI__builtin_coro_resume:
2348     return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
2349   case Builtin::BI__builtin_coro_frame:
2350     return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
2351   case Builtin::BI__builtin_coro_free:
2352     return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
2353   case Builtin::BI__builtin_coro_destroy:
2354     return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
2355   case Builtin::BI__builtin_coro_done:
2356     return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
2357   case Builtin::BI__builtin_coro_alloc:
2358     return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
2359   case Builtin::BI__builtin_coro_begin:
2360     return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
2361   case Builtin::BI__builtin_coro_end:
2362     return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
2363   case Builtin::BI__builtin_coro_suspend:
2364     return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
2365   case Builtin::BI__builtin_coro_param:
2366     return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
2367 
2368   // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2369   case Builtin::BIread_pipe:
2370   case Builtin::BIwrite_pipe: {
2371     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2372           *Arg1 = EmitScalarExpr(E->getArg(1));
2373     CGOpenCLRuntime OpenCLRT(CGM);
2374     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2375     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2376 
2377     // Type of the generic packet parameter.
2378     unsigned GenericAS =
2379         getContext().getTargetAddressSpace(LangAS::opencl_generic);
2380     llvm::Type *I8PTy = llvm::PointerType::get(
2381         llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2382 
2383     // Testing which overloaded version we should generate the call for.
2384     if (2U == E->getNumArgs()) {
2385       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2386                                                              : "__write_pipe_2";
2387       // Creating a generic function type to be able to call with any builtin or
2388       // user defined type.
2389       llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
2390       llvm::FunctionType *FTy = llvm::FunctionType::get(
2391           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2392       Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2393       return RValue::get(
2394           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2395                              {Arg0, BCast, PacketSize, PacketAlign}));
2396     } else {
2397       assert(4 == E->getNumArgs() &&
2398              "Illegal number of parameters to pipe function");
2399       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2400                                                              : "__write_pipe_4";
2401 
2402       llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
2403                               Int32Ty, Int32Ty};
2404       Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2405             *Arg3 = EmitScalarExpr(E->getArg(3));
2406       llvm::FunctionType *FTy = llvm::FunctionType::get(
2407           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2408       Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2409       // We know the third argument is an integer type, but we may need to cast
2410       // it to i32.
2411       if (Arg2->getType() != Int32Ty)
2412         Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2413       return RValue::get(Builder.CreateCall(
2414           CGM.CreateRuntimeFunction(FTy, Name),
2415           {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
2416     }
2417   }
2418   // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2419   // functions
2420   case Builtin::BIreserve_read_pipe:
2421   case Builtin::BIreserve_write_pipe:
2422   case Builtin::BIwork_group_reserve_read_pipe:
2423   case Builtin::BIwork_group_reserve_write_pipe:
2424   case Builtin::BIsub_group_reserve_read_pipe:
2425   case Builtin::BIsub_group_reserve_write_pipe: {
2426     // Composing the mangled name for the function.
2427     const char *Name;
2428     if (BuiltinID == Builtin::BIreserve_read_pipe)
2429       Name = "__reserve_read_pipe";
2430     else if (BuiltinID == Builtin::BIreserve_write_pipe)
2431       Name = "__reserve_write_pipe";
2432     else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2433       Name = "__work_group_reserve_read_pipe";
2434     else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2435       Name = "__work_group_reserve_write_pipe";
2436     else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2437       Name = "__sub_group_reserve_read_pipe";
2438     else
2439       Name = "__sub_group_reserve_write_pipe";
2440 
2441     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2442           *Arg1 = EmitScalarExpr(E->getArg(1));
2443     llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2444     CGOpenCLRuntime OpenCLRT(CGM);
2445     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2446     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2447 
2448     // Building the generic function prototype.
2449     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
2450     llvm::FunctionType *FTy = llvm::FunctionType::get(
2451         ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2452     // We know the second argument is an integer type, but we may need to cast
2453     // it to i32.
2454     if (Arg1->getType() != Int32Ty)
2455       Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2456     return RValue::get(
2457         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2458                            {Arg0, Arg1, PacketSize, PacketAlign}));
2459   }
2460   // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2461   // functions
2462   case Builtin::BIcommit_read_pipe:
2463   case Builtin::BIcommit_write_pipe:
2464   case Builtin::BIwork_group_commit_read_pipe:
2465   case Builtin::BIwork_group_commit_write_pipe:
2466   case Builtin::BIsub_group_commit_read_pipe:
2467   case Builtin::BIsub_group_commit_write_pipe: {
2468     const char *Name;
2469     if (BuiltinID == Builtin::BIcommit_read_pipe)
2470       Name = "__commit_read_pipe";
2471     else if (BuiltinID == Builtin::BIcommit_write_pipe)
2472       Name = "__commit_write_pipe";
2473     else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2474       Name = "__work_group_commit_read_pipe";
2475     else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2476       Name = "__work_group_commit_write_pipe";
2477     else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2478       Name = "__sub_group_commit_read_pipe";
2479     else
2480       Name = "__sub_group_commit_write_pipe";
2481 
2482     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2483           *Arg1 = EmitScalarExpr(E->getArg(1));
2484     CGOpenCLRuntime OpenCLRT(CGM);
2485     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2486     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2487 
2488     // Building the generic function prototype.
2489     llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
2490     llvm::FunctionType *FTy =
2491         llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2492                                 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2493 
2494     return RValue::get(
2495         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2496                            {Arg0, Arg1, PacketSize, PacketAlign}));
2497   }
2498   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2499   case Builtin::BIget_pipe_num_packets:
2500   case Builtin::BIget_pipe_max_packets: {
2501     const char *Name;
2502     if (BuiltinID == Builtin::BIget_pipe_num_packets)
2503       Name = "__get_pipe_num_packets";
2504     else
2505       Name = "__get_pipe_max_packets";
2506 
2507     // Building the generic function prototype.
2508     Value *Arg0 = EmitScalarExpr(E->getArg(0));
2509     CGOpenCLRuntime OpenCLRT(CGM);
2510     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2511     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2512     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
2513     llvm::FunctionType *FTy = llvm::FunctionType::get(
2514         Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2515 
2516     return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2517                                           {Arg0, PacketSize, PacketAlign}));
2518   }
2519 
2520   // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2521   case Builtin::BIto_global:
2522   case Builtin::BIto_local:
2523   case Builtin::BIto_private: {
2524     auto Arg0 = EmitScalarExpr(E->getArg(0));
2525     auto NewArgT = llvm::PointerType::get(Int8Ty,
2526       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2527     auto NewRetT = llvm::PointerType::get(Int8Ty,
2528       CGM.getContext().getTargetAddressSpace(
2529         E->getType()->getPointeeType().getAddressSpace()));
2530     auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2531     llvm::Value *NewArg;
2532     if (Arg0->getType()->getPointerAddressSpace() !=
2533         NewArgT->getPointerAddressSpace())
2534       NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2535     else
2536       NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2537     auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2538     auto NewCall =
2539         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2540     return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2541       ConvertType(E->getType())));
2542   }
2543 
2544   // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2545   // It contains four different overload formats specified in Table 6.13.17.1.
2546   case Builtin::BIenqueue_kernel: {
2547     StringRef Name; // Generated function call name
2548     unsigned NumArgs = E->getNumArgs();
2549 
2550     llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2551     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2552         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2553 
2554     llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2555     llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2556     LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
2557     llvm::Value *Range = NDRangeL.getAddress().getPointer();
2558     llvm::Type *RangeTy = NDRangeL.getAddress().getType();
2559 
2560     if (NumArgs == 4) {
2561       // The most basic form of the call with parameters:
2562       // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2563       Name = "__enqueue_kernel_basic";
2564       llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy};
2565       llvm::FunctionType *FTy = llvm::FunctionType::get(
2566           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
2567 
2568       llvm::Value *Block = Builder.CreatePointerCast(
2569           EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
2570 
2571       AttrBuilder B;
2572       B.addAttribute(Attribute::ByVal);
2573       llvm::AttributeList ByValAttrSet =
2574           llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
2575 
2576       auto RTCall =
2577           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
2578                              {Queue, Flags, Range, Block});
2579       RTCall->setAttributes(ByValAttrSet);
2580       return RValue::get(RTCall);
2581     }
2582     assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2583 
2584     // Could have events and/or vaargs.
2585     if (E->getArg(3)->getType()->isBlockPointerType()) {
2586       // No events passed, but has variadic arguments.
2587       Name = "__enqueue_kernel_vaargs";
2588       llvm::Value *Block = Builder.CreatePointerCast(
2589           EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
2590       // Create a vector of the arguments, as well as a constant value to
2591       // express to the runtime the number of variadic arguments.
2592       std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
2593                                          ConstantInt::get(IntTy, NumArgs - 4)};
2594       std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy,
2595                                           GenericVoidPtrTy, IntTy};
2596 
2597       // Each of the following arguments specifies the size of the corresponding
2598       // argument passed to the enqueued block.
2599       for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I)
2600         Args.push_back(
2601             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
2602 
2603       llvm::FunctionType *FTy = llvm::FunctionType::get(
2604           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2605       return RValue::get(
2606           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2607                              llvm::ArrayRef<llvm::Value *>(Args)));
2608     }
2609     // Any calls now have event arguments passed.
2610     if (NumArgs >= 7) {
2611       llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2612       llvm::Type *EventPtrTy = EventTy->getPointerTo(
2613           CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2614 
2615       llvm::Value *NumEvents =
2616           Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
2617       llvm::Value *EventList =
2618           E->getArg(4)->getType()->isArrayType()
2619               ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2620               : EmitScalarExpr(E->getArg(4));
2621       llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2622       // Convert to generic address space.
2623       EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
2624       ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
2625       llvm::Value *Block = Builder.CreatePointerCast(
2626           EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy);
2627 
2628       std::vector<llvm::Type *> ArgTys = {
2629           QueueTy,    Int32Ty,    RangeTy,         Int32Ty,
2630           EventPtrTy, EventPtrTy, GenericVoidPtrTy};
2631 
2632       std::vector<llvm::Value *> Args = {Queue,     Flags,    Range, NumEvents,
2633                                          EventList, ClkEvent, Block};
2634 
2635       if (NumArgs == 7) {
2636         // Has events but no variadics.
2637         Name = "__enqueue_kernel_basic_events";
2638         llvm::FunctionType *FTy = llvm::FunctionType::get(
2639             Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2640         return RValue::get(
2641             Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2642                                llvm::ArrayRef<llvm::Value *>(Args)));
2643       }
2644       // Has event info and variadics
2645       // Pass the number of variadics to the runtime function too.
2646       Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2647       ArgTys.push_back(Int32Ty);
2648       Name = "__enqueue_kernel_events_vaargs";
2649 
2650       // Each of the following arguments specifies the size of the corresponding
2651       // argument passed to the enqueued block.
2652       for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I)
2653         Args.push_back(
2654             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
2655 
2656       llvm::FunctionType *FTy = llvm::FunctionType::get(
2657           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2658       return RValue::get(
2659           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2660                              llvm::ArrayRef<llvm::Value *>(Args)));
2661     }
2662   }
2663   // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2664   // parameter.
2665   case Builtin::BIget_kernel_work_group_size: {
2666     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2667         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2668     Value *Arg = EmitScalarExpr(E->getArg(0));
2669     Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2670     return RValue::get(Builder.CreateCall(
2671         CGM.CreateRuntimeFunction(
2672             llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2673             "__get_kernel_work_group_size_impl"),
2674         Arg));
2675   }
2676   case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
2677     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2678         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2679     Value *Arg = EmitScalarExpr(E->getArg(0));
2680     Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2681     return RValue::get(Builder.CreateCall(
2682         CGM.CreateRuntimeFunction(
2683             llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2684             "__get_kernel_preferred_work_group_multiple_impl"),
2685         Arg));
2686   }
2687   case Builtin::BIprintf:
2688     if (getTarget().getTriple().isNVPTX())
2689       return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
2690     break;
2691   case Builtin::BI__builtin_canonicalize:
2692   case Builtin::BI__builtin_canonicalizef:
2693   case Builtin::BI__builtin_canonicalizel:
2694     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2695 
2696   case Builtin::BI__builtin_thread_pointer: {
2697     if (!getContext().getTargetInfo().isTLSSupported())
2698       CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
2699     // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
2700     break;
2701   }
2702   case Builtin::BI__builtin_os_log_format: {
2703     assert(E->getNumArgs() >= 2 &&
2704            "__builtin_os_log_format takes at least 2 arguments");
2705     analyze_os_log::OSLogBufferLayout Layout;
2706     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2707     Address BufAddr = EmitPointerWithAlignment(E->getArg(0));
2708     // Ignore argument 1, the format string. It is not currently used.
2709     CharUnits Offset;
2710     Builder.CreateStore(
2711         Builder.getInt8(Layout.getSummaryByte()),
2712         Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2713     Builder.CreateStore(
2714         Builder.getInt8(Layout.getNumArgsByte()),
2715         Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2716 
2717     llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2718     for (const auto &Item : Layout.Items) {
2719       Builder.CreateStore(
2720           Builder.getInt8(Item.getDescriptorByte()),
2721           Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2722       Builder.CreateStore(
2723           Builder.getInt8(Item.getSizeByte()),
2724           Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2725       Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset);
2726       if (const Expr *TheExpr = Item.getExpr()) {
2727         Addr = Builder.CreateElementBitCast(
2728             Addr, ConvertTypeForMem(TheExpr->getType()));
2729         // Check if this is a retainable type.
2730         if (TheExpr->getType()->isObjCRetainableType()) {
2731           assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2732                  "Only scalar can be a ObjC retainable type");
2733           llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2734           RValue RV = RValue::get(SV);
2735           LValue LV = MakeAddrLValue(Addr, TheExpr->getType());
2736           EmitStoreThroughLValue(RV, LV);
2737           // Check if the object is constant, if not, save it in
2738           // RetainableOperands.
2739           if (!isa<Constant>(SV))
2740             RetainableOperands.push_back(SV);
2741         } else {
2742           EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true);
2743         }
2744       } else {
2745         Addr = Builder.CreateElementBitCast(Addr, Int32Ty);
2746         Builder.CreateStore(
2747             Builder.getInt32(Item.getConstValue().getQuantity()), Addr);
2748       }
2749       Offset += Item.size();
2750     }
2751 
2752     // Push a clang.arc.use cleanup for each object in RetainableOperands. The
2753     // cleanup will cause the use to appear after the final log call, keeping
2754     // the object valid while it's held in the log buffer.  Note that if there's
2755     // a release cleanup on the object, it will already be active; since
2756     // cleanups are emitted in reverse order, the use will occur before the
2757     // object is released.
2758     if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
2759         CGM.getCodeGenOpts().OptimizationLevel != 0)
2760       for (llvm::Value *object : RetainableOperands)
2761         pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object);
2762 
2763     return RValue::get(BufAddr.getPointer());
2764   }
2765 
2766   case Builtin::BI__builtin_os_log_format_buffer_size: {
2767     analyze_os_log::OSLogBufferLayout Layout;
2768     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2769     return RValue::get(ConstantInt::get(ConvertType(E->getType()),
2770                                         Layout.size().getQuantity()));
2771   }
2772 
2773   case Builtin::BI__xray_customevent: {
2774     if (!ShouldXRayInstrumentFunction())
2775       return RValue::getIgnored();
2776     if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) {
2777       if (XRayAttr->neverXRayInstrument())
2778         return RValue::getIgnored();
2779     }
2780     Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
2781     auto FTy = F->getFunctionType();
2782     auto Arg0 = E->getArg(0);
2783     auto Arg0Val = EmitScalarExpr(Arg0);
2784     auto Arg0Ty = Arg0->getType();
2785     auto PTy0 = FTy->getParamType(0);
2786     if (PTy0 != Arg0Val->getType()) {
2787       if (Arg0Ty->isArrayType())
2788         Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
2789       else
2790         Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
2791     }
2792     auto Arg1 = EmitScalarExpr(E->getArg(1));
2793     auto PTy1 = FTy->getParamType(1);
2794     if (PTy1 != Arg1->getType())
2795       Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
2796     return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
2797   }
2798   }
2799 
2800   // If this is an alias for a lib function (e.g. __builtin_sin), emit
2801   // the call using the normal call path, but using the unmangled
2802   // version of the function name.
2803   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
2804     return emitLibraryCall(*this, FD, E,
2805                            CGM.getBuiltinLibFunction(FD, BuiltinID));
2806 
2807   // If this is a predefined lib function (e.g. malloc), emit the call
2808   // using exactly the normal call path.
2809   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
2810     return emitLibraryCall(*this, FD, E,
2811                       cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
2812 
2813   // Check that a call to a target specific builtin has the correct target
2814   // features.
2815   // This is down here to avoid non-target specific builtins, however, if
2816   // generic builtins start to require generic target features then we
2817   // can move this up to the beginning of the function.
2818   checkTargetFeatures(E, FD);
2819 
2820   // See if we have a target specific intrinsic.
2821   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
2822   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
2823   StringRef Prefix =
2824       llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
2825   if (!Prefix.empty()) {
2826     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
2827     // NOTE we dont need to perform a compatibility flag check here since the
2828     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
2829     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
2830     if (IntrinsicID == Intrinsic::not_intrinsic)
2831       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
2832   }
2833 
2834   if (IntrinsicID != Intrinsic::not_intrinsic) {
2835     SmallVector<Value*, 16> Args;
2836 
2837     // Find out if any arguments are required to be integer constant
2838     // expressions.
2839     unsigned ICEArguments = 0;
2840     ASTContext::GetBuiltinTypeError Error;
2841     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2842     assert(Error == ASTContext::GE_None && "Should not codegen an error");
2843 
2844     Function *F = CGM.getIntrinsic(IntrinsicID);
2845     llvm::FunctionType *FTy = F->getFunctionType();
2846 
2847     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
2848       Value *ArgValue;
2849       // If this is a normal argument, just emit it as a scalar.
2850       if ((ICEArguments & (1 << i)) == 0) {
2851         ArgValue = EmitScalarExpr(E->getArg(i));
2852       } else {
2853         // If this is required to be a constant, constant fold it so that we
2854         // know that the generated intrinsic gets a ConstantInt.
2855         llvm::APSInt Result;
2856         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2857         assert(IsConst && "Constant arg isn't actually constant?");
2858         (void)IsConst;
2859         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2860       }
2861 
2862       // If the intrinsic arg type is different from the builtin arg type
2863       // we need to do a bit cast.
2864       llvm::Type *PTy = FTy->getParamType(i);
2865       if (PTy != ArgValue->getType()) {
2866         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2867                "Must be able to losslessly bit cast to param");
2868         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2869       }
2870 
2871       Args.push_back(ArgValue);
2872     }
2873 
2874     Value *V = Builder.CreateCall(F, Args);
2875     QualType BuiltinRetType = E->getType();
2876 
2877     llvm::Type *RetTy = VoidTy;
2878     if (!BuiltinRetType->isVoidType())
2879       RetTy = ConvertType(BuiltinRetType);
2880 
2881     if (RetTy != V->getType()) {
2882       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
2883              "Must be able to losslessly bit cast result type");
2884       V = Builder.CreateBitCast(V, RetTy);
2885     }
2886 
2887     return RValue::get(V);
2888   }
2889 
2890   // See if we have a target specific builtin that needs to be lowered.
2891   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
2892     return RValue::get(V);
2893 
2894   ErrorUnsupported(E, "builtin function");
2895 
2896   // Unknown builtin, for now just dump it out and return undef.
2897   return GetUndefRValue(E->getType());
2898 }
2899 
2900 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
2901                                         unsigned BuiltinID, const CallExpr *E,
2902                                         llvm::Triple::ArchType Arch) {
2903   switch (Arch) {
2904   case llvm::Triple::arm:
2905   case llvm::Triple::armeb:
2906   case llvm::Triple::thumb:
2907   case llvm::Triple::thumbeb:
2908     return CGF->EmitARMBuiltinExpr(BuiltinID, E);
2909   case llvm::Triple::aarch64:
2910   case llvm::Triple::aarch64_be:
2911     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
2912   case llvm::Triple::x86:
2913   case llvm::Triple::x86_64:
2914     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
2915   case llvm::Triple::ppc:
2916   case llvm::Triple::ppc64:
2917   case llvm::Triple::ppc64le:
2918     return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
2919   case llvm::Triple::r600:
2920   case llvm::Triple::amdgcn:
2921     return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
2922   case llvm::Triple::systemz:
2923     return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
2924   case llvm::Triple::nvptx:
2925   case llvm::Triple::nvptx64:
2926     return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
2927   case llvm::Triple::wasm32:
2928   case llvm::Triple::wasm64:
2929     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
2930   default:
2931     return nullptr;
2932   }
2933 }
2934 
2935 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
2936                                               const CallExpr *E) {
2937   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
2938     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
2939     return EmitTargetArchBuiltinExpr(
2940         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
2941         getContext().getAuxTargetInfo()->getTriple().getArch());
2942   }
2943 
2944   return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
2945                                    getTarget().getTriple().getArch());
2946 }
2947 
2948 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
2949                                      NeonTypeFlags TypeFlags,
2950                                      bool V1Ty=false) {
2951   int IsQuad = TypeFlags.isQuad();
2952   switch (TypeFlags.getEltType()) {
2953   case NeonTypeFlags::Int8:
2954   case NeonTypeFlags::Poly8:
2955     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
2956   case NeonTypeFlags::Int16:
2957   case NeonTypeFlags::Poly16:
2958   case NeonTypeFlags::Float16:
2959     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
2960   case NeonTypeFlags::Int32:
2961     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
2962   case NeonTypeFlags::Int64:
2963   case NeonTypeFlags::Poly64:
2964     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
2965   case NeonTypeFlags::Poly128:
2966     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
2967     // There is a lot of i128 and f128 API missing.
2968     // so we use v16i8 to represent poly128 and get pattern matched.
2969     return llvm::VectorType::get(CGF->Int8Ty, 16);
2970   case NeonTypeFlags::Float32:
2971     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
2972   case NeonTypeFlags::Float64:
2973     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
2974   }
2975   llvm_unreachable("Unknown vector element type!");
2976 }
2977 
2978 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
2979                                           NeonTypeFlags IntTypeFlags) {
2980   int IsQuad = IntTypeFlags.isQuad();
2981   switch (IntTypeFlags.getEltType()) {
2982   case NeonTypeFlags::Int32:
2983     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
2984   case NeonTypeFlags::Int64:
2985     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
2986   default:
2987     llvm_unreachable("Type can't be converted to floating-point!");
2988   }
2989 }
2990 
2991 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
2992   unsigned nElts = V->getType()->getVectorNumElements();
2993   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
2994   return Builder.CreateShuffleVector(V, V, SV, "lane");
2995 }
2996 
2997 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
2998                                      const char *name,
2999                                      unsigned shift, bool rightshift) {
3000   unsigned j = 0;
3001   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3002        ai != ae; ++ai, ++j)
3003     if (shift > 0 && shift == j)
3004       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
3005     else
3006       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
3007 
3008   return Builder.CreateCall(F, Ops, name);
3009 }
3010 
3011 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
3012                                             bool neg) {
3013   int SV = cast<ConstantInt>(V)->getSExtValue();
3014   return ConstantInt::get(Ty, neg ? -SV : SV);
3015 }
3016 
3017 // \brief Right-shift a vector by a constant.
3018 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
3019                                           llvm::Type *Ty, bool usgn,
3020                                           const char *name) {
3021   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
3022 
3023   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
3024   int EltSize = VTy->getScalarSizeInBits();
3025 
3026   Vec = Builder.CreateBitCast(Vec, Ty);
3027 
3028   // lshr/ashr are undefined when the shift amount is equal to the vector
3029   // element size.
3030   if (ShiftAmt == EltSize) {
3031     if (usgn) {
3032       // Right-shifting an unsigned value by its size yields 0.
3033       return llvm::ConstantAggregateZero::get(VTy);
3034     } else {
3035       // Right-shifting a signed value by its size is equivalent
3036       // to a shift of size-1.
3037       --ShiftAmt;
3038       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
3039     }
3040   }
3041 
3042   Shift = EmitNeonShiftVector(Shift, Ty, false);
3043   if (usgn)
3044     return Builder.CreateLShr(Vec, Shift, name);
3045   else
3046     return Builder.CreateAShr(Vec, Shift, name);
3047 }
3048 
3049 enum {
3050   AddRetType = (1 << 0),
3051   Add1ArgType = (1 << 1),
3052   Add2ArgTypes = (1 << 2),
3053 
3054   VectorizeRetType = (1 << 3),
3055   VectorizeArgTypes = (1 << 4),
3056 
3057   InventFloatType = (1 << 5),
3058   UnsignedAlts = (1 << 6),
3059 
3060   Use64BitVectors = (1 << 7),
3061   Use128BitVectors = (1 << 8),
3062 
3063   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
3064   VectorRet = AddRetType | VectorizeRetType,
3065   VectorRetGetArgs01 =
3066       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
3067   FpCmpzModifiers =
3068       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
3069 };
3070 
3071 namespace {
3072 struct NeonIntrinsicInfo {
3073   const char *NameHint;
3074   unsigned BuiltinID;
3075   unsigned LLVMIntrinsic;
3076   unsigned AltLLVMIntrinsic;
3077   unsigned TypeModifier;
3078 
3079   bool operator<(unsigned RHSBuiltinID) const {
3080     return BuiltinID < RHSBuiltinID;
3081   }
3082   bool operator<(const NeonIntrinsicInfo &TE) const {
3083     return BuiltinID < TE.BuiltinID;
3084   }
3085 };
3086 } // end anonymous namespace
3087 
3088 #define NEONMAP0(NameBase) \
3089   { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
3090 
3091 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
3092   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3093       Intrinsic::LLVMIntrinsic, 0, TypeModifier }
3094 
3095 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
3096   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3097       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
3098       TypeModifier }
3099 
3100 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
3101   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3102   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3103   NEONMAP1(vabs_v, arm_neon_vabs, 0),
3104   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
3105   NEONMAP0(vaddhn_v),
3106   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
3107   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
3108   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
3109   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
3110   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
3111   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
3112   NEONMAP1(vcage_v, arm_neon_vacge, 0),
3113   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
3114   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
3115   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
3116   NEONMAP1(vcale_v, arm_neon_vacge, 0),
3117   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
3118   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
3119   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
3120   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
3121   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
3122   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3123   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3124   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3125   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3126   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
3127   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
3128   NEONMAP0(vcvt_f32_v),
3129   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3130   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3131   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3132   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3133   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3134   NEONMAP0(vcvt_s32_v),
3135   NEONMAP0(vcvt_s64_v),
3136   NEONMAP0(vcvt_u32_v),
3137   NEONMAP0(vcvt_u64_v),
3138   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
3139   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
3140   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
3141   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
3142   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
3143   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
3144   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
3145   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
3146   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
3147   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
3148   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
3149   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
3150   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
3151   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
3152   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
3153   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
3154   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
3155   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
3156   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
3157   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
3158   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
3159   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
3160   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
3161   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
3162   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
3163   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
3164   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
3165   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
3166   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
3167   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
3168   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
3169   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
3170   NEONMAP0(vcvtq_f32_v),
3171   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3172   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3173   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3174   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3175   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3176   NEONMAP0(vcvtq_s32_v),
3177   NEONMAP0(vcvtq_s64_v),
3178   NEONMAP0(vcvtq_u32_v),
3179   NEONMAP0(vcvtq_u64_v),
3180   NEONMAP0(vext_v),
3181   NEONMAP0(vextq_v),
3182   NEONMAP0(vfma_v),
3183   NEONMAP0(vfmaq_v),
3184   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3185   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3186   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3187   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3188   NEONMAP0(vld1_dup_v),
3189   NEONMAP1(vld1_v, arm_neon_vld1, 0),
3190   NEONMAP0(vld1q_dup_v),
3191   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
3192   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
3193   NEONMAP1(vld2_v, arm_neon_vld2, 0),
3194   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
3195   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
3196   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
3197   NEONMAP1(vld3_v, arm_neon_vld3, 0),
3198   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
3199   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
3200   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
3201   NEONMAP1(vld4_v, arm_neon_vld4, 0),
3202   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
3203   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
3204   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3205   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
3206   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
3207   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3208   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3209   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
3210   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
3211   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3212   NEONMAP0(vmovl_v),
3213   NEONMAP0(vmovn_v),
3214   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
3215   NEONMAP0(vmull_v),
3216   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
3217   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3218   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3219   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
3220   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3221   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3222   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
3223   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
3224   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
3225   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
3226   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
3227   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3228   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3229   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
3230   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
3231   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
3232   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
3233   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
3234   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
3235   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
3236   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
3237   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
3238   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
3239   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
3240   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3241   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3242   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3243   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3244   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3245   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3246   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
3247   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
3248   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3249   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3250   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
3251   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3252   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3253   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
3254   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
3255   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3256   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3257   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
3258   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
3259   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
3260   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
3261   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
3262   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
3263   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
3264   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
3265   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
3266   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
3267   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
3268   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
3269   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3270   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3271   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3272   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3273   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3274   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3275   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
3276   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
3277   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
3278   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
3279   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
3280   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
3281   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
3282   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
3283   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
3284   NEONMAP0(vshl_n_v),
3285   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3286   NEONMAP0(vshll_n_v),
3287   NEONMAP0(vshlq_n_v),
3288   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3289   NEONMAP0(vshr_n_v),
3290   NEONMAP0(vshrn_n_v),
3291   NEONMAP0(vshrq_n_v),
3292   NEONMAP1(vst1_v, arm_neon_vst1, 0),
3293   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
3294   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
3295   NEONMAP1(vst2_v, arm_neon_vst2, 0),
3296   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
3297   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
3298   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
3299   NEONMAP1(vst3_v, arm_neon_vst3, 0),
3300   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
3301   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
3302   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
3303   NEONMAP1(vst4_v, arm_neon_vst4, 0),
3304   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
3305   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
3306   NEONMAP0(vsubhn_v),
3307   NEONMAP0(vtrn_v),
3308   NEONMAP0(vtrnq_v),
3309   NEONMAP0(vtst_v),
3310   NEONMAP0(vtstq_v),
3311   NEONMAP0(vuzp_v),
3312   NEONMAP0(vuzpq_v),
3313   NEONMAP0(vzip_v),
3314   NEONMAP0(vzipq_v)
3315 };
3316 
3317 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
3318   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
3319   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
3320   NEONMAP0(vaddhn_v),
3321   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
3322   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
3323   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
3324   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
3325   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
3326   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
3327   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
3328   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
3329   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
3330   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
3331   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
3332   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
3333   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
3334   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
3335   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3336   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3337   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3338   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3339   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
3340   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
3341   NEONMAP0(vcvt_f32_v),
3342   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3343   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3344   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3345   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3346   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3347   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3348   NEONMAP0(vcvtq_f32_v),
3349   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3350   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3351   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3352   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3353   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3354   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3355   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
3356   NEONMAP0(vext_v),
3357   NEONMAP0(vextq_v),
3358   NEONMAP0(vfma_v),
3359   NEONMAP0(vfmaq_v),
3360   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3361   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3362   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3363   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3364   NEONMAP0(vmovl_v),
3365   NEONMAP0(vmovn_v),
3366   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
3367   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
3368   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
3369   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3370   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3371   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
3372   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
3373   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
3374   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3375   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3376   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
3377   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
3378   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
3379   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
3380   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
3381   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
3382   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
3383   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
3384   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
3385   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
3386   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
3387   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3388   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3389   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
3390   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3391   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
3392   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3393   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
3394   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
3395   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3396   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3397   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
3398   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3399   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3400   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
3401   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
3402   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3403   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3404   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3405   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3406   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3407   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3408   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3409   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3410   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
3411   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
3412   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
3413   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
3414   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
3415   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
3416   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
3417   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
3418   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
3419   NEONMAP0(vshl_n_v),
3420   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3421   NEONMAP0(vshll_n_v),
3422   NEONMAP0(vshlq_n_v),
3423   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3424   NEONMAP0(vshr_n_v),
3425   NEONMAP0(vshrn_n_v),
3426   NEONMAP0(vshrq_n_v),
3427   NEONMAP0(vsubhn_v),
3428   NEONMAP0(vtst_v),
3429   NEONMAP0(vtstq_v),
3430 };
3431 
3432 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3433   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3434   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3435   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3436   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3437   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3438   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3439   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3440   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3441   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3442   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3443   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3444   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3445   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3446   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3447   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3448   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3449   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3450   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3451   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3452   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3453   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3454   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3455   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3456   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3457   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3458   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3459   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3460   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3461   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3462   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3463   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3464   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3465   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3466   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3467   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3468   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3469   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3470   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3471   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3472   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3473   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3474   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3475   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3476   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3477   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3478   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3479   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3480   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3481   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3482   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3483   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3484   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3485   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3486   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3487   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3488   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3489   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3490   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3491   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3492   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3493   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3494   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3495   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3496   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3497   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3498   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3499   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3500   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3501   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3502   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3503   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3504   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3505   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3506   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3507   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3508   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3509   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3510   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3511   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3512   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3513   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3514   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3515   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3516   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3517   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3518   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3519   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3520   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3521   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3522   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3523   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3524   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3525   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3526   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3527   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3528   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3529   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3530   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3531   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3532   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3533   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3534   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3535   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3536   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3537   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3538   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3539   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3540   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3541   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3542   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3543   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3544   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3545   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3546   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3547   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3548   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3549   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3550   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3551   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3552   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3553   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3554   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3555   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3556   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3557   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3558   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3559   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3560   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3561   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3562   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3563   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3564   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3565   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3566   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3567   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3568   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3569   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3570   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3571   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3572   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3573   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3574   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3575   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3576   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3577   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3578   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3579   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3580   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3581   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3582   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3583   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3584   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3585   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3586   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3587   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3588   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3589   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3590   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3591   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3592   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3593   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3594   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3595   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3596   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3597   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3598   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3599   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3600   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3601   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3602   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3603   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3604   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3605   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3606   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3607   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3608   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3609   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3610   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3611   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3612   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3613   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3614   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3615   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3616   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3617   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3618   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3619   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3620   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3621   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3622   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3623   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3624   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3625 };
3626 
3627 #undef NEONMAP0
3628 #undef NEONMAP1
3629 #undef NEONMAP2
3630 
3631 static bool NEONSIMDIntrinsicsProvenSorted = false;
3632 
3633 static bool AArch64SIMDIntrinsicsProvenSorted = false;
3634 static bool AArch64SISDIntrinsicsProvenSorted = false;
3635 
3636 
3637 static const NeonIntrinsicInfo *
3638 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
3639                        unsigned BuiltinID, bool &MapProvenSorted) {
3640 
3641 #ifndef NDEBUG
3642   if (!MapProvenSorted) {
3643     assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3644     MapProvenSorted = true;
3645   }
3646 #endif
3647 
3648   const NeonIntrinsicInfo *Builtin =
3649       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3650 
3651   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
3652     return Builtin;
3653 
3654   return nullptr;
3655 }
3656 
3657 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3658                                                    unsigned Modifier,
3659                                                    llvm::Type *ArgType,
3660                                                    const CallExpr *E) {
3661   int VectorSize = 0;
3662   if (Modifier & Use64BitVectors)
3663     VectorSize = 64;
3664   else if (Modifier & Use128BitVectors)
3665     VectorSize = 128;
3666 
3667   // Return type.
3668   SmallVector<llvm::Type *, 3> Tys;
3669   if (Modifier & AddRetType) {
3670     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
3671     if (Modifier & VectorizeRetType)
3672       Ty = llvm::VectorType::get(
3673           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
3674 
3675     Tys.push_back(Ty);
3676   }
3677 
3678   // Arguments.
3679   if (Modifier & VectorizeArgTypes) {
3680     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
3681     ArgType = llvm::VectorType::get(ArgType, Elts);
3682   }
3683 
3684   if (Modifier & (Add1ArgType | Add2ArgTypes))
3685     Tys.push_back(ArgType);
3686 
3687   if (Modifier & Add2ArgTypes)
3688     Tys.push_back(ArgType);
3689 
3690   if (Modifier & InventFloatType)
3691     Tys.push_back(FloatTy);
3692 
3693   return CGM.getIntrinsic(IntrinsicID, Tys);
3694 }
3695 
3696 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
3697                                             const NeonIntrinsicInfo &SISDInfo,
3698                                             SmallVectorImpl<Value *> &Ops,
3699                                             const CallExpr *E) {
3700   unsigned BuiltinID = SISDInfo.BuiltinID;
3701   unsigned int Int = SISDInfo.LLVMIntrinsic;
3702   unsigned Modifier = SISDInfo.TypeModifier;
3703   const char *s = SISDInfo.NameHint;
3704 
3705   switch (BuiltinID) {
3706   case NEON::BI__builtin_neon_vcled_s64:
3707   case NEON::BI__builtin_neon_vcled_u64:
3708   case NEON::BI__builtin_neon_vcles_f32:
3709   case NEON::BI__builtin_neon_vcled_f64:
3710   case NEON::BI__builtin_neon_vcltd_s64:
3711   case NEON::BI__builtin_neon_vcltd_u64:
3712   case NEON::BI__builtin_neon_vclts_f32:
3713   case NEON::BI__builtin_neon_vcltd_f64:
3714   case NEON::BI__builtin_neon_vcales_f32:
3715   case NEON::BI__builtin_neon_vcaled_f64:
3716   case NEON::BI__builtin_neon_vcalts_f32:
3717   case NEON::BI__builtin_neon_vcaltd_f64:
3718     // Only one direction of comparisons actually exist, cmle is actually a cmge
3719     // with swapped operands. The table gives us the right intrinsic but we
3720     // still need to do the swap.
3721     std::swap(Ops[0], Ops[1]);
3722     break;
3723   }
3724 
3725   assert(Int && "Generic code assumes a valid intrinsic");
3726 
3727   // Determine the type(s) of this overloaded AArch64 intrinsic.
3728   const Expr *Arg = E->getArg(0);
3729   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3730   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3731 
3732   int j = 0;
3733   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3734   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3735        ai != ae; ++ai, ++j) {
3736     llvm::Type *ArgTy = ai->getType();
3737     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
3738              ArgTy->getPrimitiveSizeInBits())
3739       continue;
3740 
3741     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
3742     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
3743     // it before inserting.
3744     Ops[j] =
3745         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
3746     Ops[j] =
3747         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
3748   }
3749 
3750   Value *Result = CGF.EmitNeonCall(F, Ops, s);
3751   llvm::Type *ResultType = CGF.ConvertType(E->getType());
3752   if (ResultType->getPrimitiveSizeInBits() <
3753       Result->getType()->getPrimitiveSizeInBits())
3754     return CGF.Builder.CreateExtractElement(Result, C0);
3755 
3756   return CGF.Builder.CreateBitCast(Result, ResultType, s);
3757 }
3758 
3759 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
3760     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
3761     const char *NameHint, unsigned Modifier, const CallExpr *E,
3762     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
3763   // Get the last argument, which specifies the vector type.
3764   llvm::APSInt NeonTypeConst;
3765   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3766   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
3767     return nullptr;
3768 
3769   // Determine the type of this overloaded NEON intrinsic.
3770   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
3771   bool Usgn = Type.isUnsigned();
3772   bool Quad = Type.isQuad();
3773 
3774   llvm::VectorType *VTy = GetNeonType(this, Type);
3775   llvm::Type *Ty = VTy;
3776   if (!Ty)
3777     return nullptr;
3778 
3779   auto getAlignmentValue32 = [&](Address addr) -> Value* {
3780     return Builder.getInt32(addr.getAlignment().getQuantity());
3781   };
3782 
3783   unsigned Int = LLVMIntrinsic;
3784   if ((Modifier & UnsignedAlts) && !Usgn)
3785     Int = AltLLVMIntrinsic;
3786 
3787   switch (BuiltinID) {
3788   default: break;
3789   case NEON::BI__builtin_neon_vabs_v:
3790   case NEON::BI__builtin_neon_vabsq_v:
3791     if (VTy->getElementType()->isFloatingPointTy())
3792       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
3793     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
3794   case NEON::BI__builtin_neon_vaddhn_v: {
3795     llvm::VectorType *SrcTy =
3796         llvm::VectorType::getExtendedElementVectorType(VTy);
3797 
3798     // %sum = add <4 x i32> %lhs, %rhs
3799     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3800     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3801     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3802 
3803     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3804     Constant *ShiftAmt =
3805         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3806     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3807 
3808     // %res = trunc <4 x i32> %high to <4 x i16>
3809     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3810   }
3811   case NEON::BI__builtin_neon_vcale_v:
3812   case NEON::BI__builtin_neon_vcaleq_v:
3813   case NEON::BI__builtin_neon_vcalt_v:
3814   case NEON::BI__builtin_neon_vcaltq_v:
3815     std::swap(Ops[0], Ops[1]);
3816   case NEON::BI__builtin_neon_vcage_v:
3817   case NEON::BI__builtin_neon_vcageq_v:
3818   case NEON::BI__builtin_neon_vcagt_v:
3819   case NEON::BI__builtin_neon_vcagtq_v: {
3820     llvm::Type *VecFlt = llvm::VectorType::get(
3821         VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
3822         VTy->getNumElements());
3823     llvm::Type *Tys[] = { VTy, VecFlt };
3824     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3825     return EmitNeonCall(F, Ops, NameHint);
3826   }
3827   case NEON::BI__builtin_neon_vclz_v:
3828   case NEON::BI__builtin_neon_vclzq_v:
3829     // We generate target-independent intrinsic, which needs a second argument
3830     // for whether or not clz of zero is undefined; on ARM it isn't.
3831     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3832     break;
3833   case NEON::BI__builtin_neon_vcvt_f32_v:
3834   case NEON::BI__builtin_neon_vcvtq_f32_v:
3835     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3836     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3837     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3838                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3839   case NEON::BI__builtin_neon_vcvt_n_f32_v:
3840   case NEON::BI__builtin_neon_vcvt_n_f64_v:
3841   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3842   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3843     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3844     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
3845     Function *F = CGM.getIntrinsic(Int, Tys);
3846     return EmitNeonCall(F, Ops, "vcvt_n");
3847   }
3848   case NEON::BI__builtin_neon_vcvt_n_s32_v:
3849   case NEON::BI__builtin_neon_vcvt_n_u32_v:
3850   case NEON::BI__builtin_neon_vcvt_n_s64_v:
3851   case NEON::BI__builtin_neon_vcvt_n_u64_v:
3852   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3853   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3854   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3855   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3856     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3857     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3858     return EmitNeonCall(F, Ops, "vcvt_n");
3859   }
3860   case NEON::BI__builtin_neon_vcvt_s32_v:
3861   case NEON::BI__builtin_neon_vcvt_u32_v:
3862   case NEON::BI__builtin_neon_vcvt_s64_v:
3863   case NEON::BI__builtin_neon_vcvt_u64_v:
3864   case NEON::BI__builtin_neon_vcvtq_s32_v:
3865   case NEON::BI__builtin_neon_vcvtq_u32_v:
3866   case NEON::BI__builtin_neon_vcvtq_s64_v:
3867   case NEON::BI__builtin_neon_vcvtq_u64_v: {
3868     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3869     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3870                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3871   }
3872   case NEON::BI__builtin_neon_vcvta_s32_v:
3873   case NEON::BI__builtin_neon_vcvta_s64_v:
3874   case NEON::BI__builtin_neon_vcvta_u32_v:
3875   case NEON::BI__builtin_neon_vcvta_u64_v:
3876   case NEON::BI__builtin_neon_vcvtaq_s32_v:
3877   case NEON::BI__builtin_neon_vcvtaq_s64_v:
3878   case NEON::BI__builtin_neon_vcvtaq_u32_v:
3879   case NEON::BI__builtin_neon_vcvtaq_u64_v:
3880   case NEON::BI__builtin_neon_vcvtn_s32_v:
3881   case NEON::BI__builtin_neon_vcvtn_s64_v:
3882   case NEON::BI__builtin_neon_vcvtn_u32_v:
3883   case NEON::BI__builtin_neon_vcvtn_u64_v:
3884   case NEON::BI__builtin_neon_vcvtnq_s32_v:
3885   case NEON::BI__builtin_neon_vcvtnq_s64_v:
3886   case NEON::BI__builtin_neon_vcvtnq_u32_v:
3887   case NEON::BI__builtin_neon_vcvtnq_u64_v:
3888   case NEON::BI__builtin_neon_vcvtp_s32_v:
3889   case NEON::BI__builtin_neon_vcvtp_s64_v:
3890   case NEON::BI__builtin_neon_vcvtp_u32_v:
3891   case NEON::BI__builtin_neon_vcvtp_u64_v:
3892   case NEON::BI__builtin_neon_vcvtpq_s32_v:
3893   case NEON::BI__builtin_neon_vcvtpq_s64_v:
3894   case NEON::BI__builtin_neon_vcvtpq_u32_v:
3895   case NEON::BI__builtin_neon_vcvtpq_u64_v:
3896   case NEON::BI__builtin_neon_vcvtm_s32_v:
3897   case NEON::BI__builtin_neon_vcvtm_s64_v:
3898   case NEON::BI__builtin_neon_vcvtm_u32_v:
3899   case NEON::BI__builtin_neon_vcvtm_u64_v:
3900   case NEON::BI__builtin_neon_vcvtmq_s32_v:
3901   case NEON::BI__builtin_neon_vcvtmq_s64_v:
3902   case NEON::BI__builtin_neon_vcvtmq_u32_v:
3903   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
3904     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3905     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
3906   }
3907   case NEON::BI__builtin_neon_vext_v:
3908   case NEON::BI__builtin_neon_vextq_v: {
3909     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3910     SmallVector<uint32_t, 16> Indices;
3911     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3912       Indices.push_back(i+CV);
3913 
3914     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3915     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3916     return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
3917   }
3918   case NEON::BI__builtin_neon_vfma_v:
3919   case NEON::BI__builtin_neon_vfmaq_v: {
3920     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
3921     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3922     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3923     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3924 
3925     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
3926     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
3927   }
3928   case NEON::BI__builtin_neon_vld1_v:
3929   case NEON::BI__builtin_neon_vld1q_v: {
3930     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3931     Ops.push_back(getAlignmentValue32(PtrOp0));
3932     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
3933   }
3934   case NEON::BI__builtin_neon_vld2_v:
3935   case NEON::BI__builtin_neon_vld2q_v:
3936   case NEON::BI__builtin_neon_vld3_v:
3937   case NEON::BI__builtin_neon_vld3q_v:
3938   case NEON::BI__builtin_neon_vld4_v:
3939   case NEON::BI__builtin_neon_vld4q_v: {
3940     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3941     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3942     Value *Align = getAlignmentValue32(PtrOp1);
3943     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
3944     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3945     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3946     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3947   }
3948   case NEON::BI__builtin_neon_vld1_dup_v:
3949   case NEON::BI__builtin_neon_vld1q_dup_v: {
3950     Value *V = UndefValue::get(Ty);
3951     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3952     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
3953     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
3954     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3955     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
3956     return EmitNeonSplat(Ops[0], CI);
3957   }
3958   case NEON::BI__builtin_neon_vld2_lane_v:
3959   case NEON::BI__builtin_neon_vld2q_lane_v:
3960   case NEON::BI__builtin_neon_vld3_lane_v:
3961   case NEON::BI__builtin_neon_vld3q_lane_v:
3962   case NEON::BI__builtin_neon_vld4_lane_v:
3963   case NEON::BI__builtin_neon_vld4q_lane_v: {
3964     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3965     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3966     for (unsigned I = 2; I < Ops.size() - 1; ++I)
3967       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
3968     Ops.push_back(getAlignmentValue32(PtrOp1));
3969     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
3970     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3971     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3972     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3973   }
3974   case NEON::BI__builtin_neon_vmovl_v: {
3975     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
3976     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
3977     if (Usgn)
3978       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
3979     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
3980   }
3981   case NEON::BI__builtin_neon_vmovn_v: {
3982     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3983     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
3984     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
3985   }
3986   case NEON::BI__builtin_neon_vmull_v:
3987     // FIXME: the integer vmull operations could be emitted in terms of pure
3988     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
3989     // hoisting the exts outside loops. Until global ISel comes along that can
3990     // see through such movement this leads to bad CodeGen. So we need an
3991     // intrinsic for now.
3992     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
3993     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
3994     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
3995   case NEON::BI__builtin_neon_vpadal_v:
3996   case NEON::BI__builtin_neon_vpadalq_v: {
3997     // The source operand type has twice as many elements of half the size.
3998     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3999     llvm::Type *EltTy =
4000       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4001     llvm::Type *NarrowTy =
4002       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4003     llvm::Type *Tys[2] = { Ty, NarrowTy };
4004     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
4005   }
4006   case NEON::BI__builtin_neon_vpaddl_v:
4007   case NEON::BI__builtin_neon_vpaddlq_v: {
4008     // The source operand type has twice as many elements of half the size.
4009     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4010     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4011     llvm::Type *NarrowTy =
4012       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4013     llvm::Type *Tys[2] = { Ty, NarrowTy };
4014     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
4015   }
4016   case NEON::BI__builtin_neon_vqdmlal_v:
4017   case NEON::BI__builtin_neon_vqdmlsl_v: {
4018     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
4019     Ops[1] =
4020         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
4021     Ops.resize(2);
4022     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
4023   }
4024   case NEON::BI__builtin_neon_vqshl_n_v:
4025   case NEON::BI__builtin_neon_vqshlq_n_v:
4026     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
4027                         1, false);
4028   case NEON::BI__builtin_neon_vqshlu_n_v:
4029   case NEON::BI__builtin_neon_vqshluq_n_v:
4030     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
4031                         1, false);
4032   case NEON::BI__builtin_neon_vrecpe_v:
4033   case NEON::BI__builtin_neon_vrecpeq_v:
4034   case NEON::BI__builtin_neon_vrsqrte_v:
4035   case NEON::BI__builtin_neon_vrsqrteq_v:
4036     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
4037     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
4038 
4039   case NEON::BI__builtin_neon_vrshr_n_v:
4040   case NEON::BI__builtin_neon_vrshrq_n_v:
4041     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
4042                         1, true);
4043   case NEON::BI__builtin_neon_vshl_n_v:
4044   case NEON::BI__builtin_neon_vshlq_n_v:
4045     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
4046     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
4047                              "vshl_n");
4048   case NEON::BI__builtin_neon_vshll_n_v: {
4049     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
4050     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4051     if (Usgn)
4052       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
4053     else
4054       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
4055     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
4056     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
4057   }
4058   case NEON::BI__builtin_neon_vshrn_n_v: {
4059     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4060     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4061     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
4062     if (Usgn)
4063       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
4064     else
4065       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
4066     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
4067   }
4068   case NEON::BI__builtin_neon_vshr_n_v:
4069   case NEON::BI__builtin_neon_vshrq_n_v:
4070     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
4071   case NEON::BI__builtin_neon_vst1_v:
4072   case NEON::BI__builtin_neon_vst1q_v:
4073   case NEON::BI__builtin_neon_vst2_v:
4074   case NEON::BI__builtin_neon_vst2q_v:
4075   case NEON::BI__builtin_neon_vst3_v:
4076   case NEON::BI__builtin_neon_vst3q_v:
4077   case NEON::BI__builtin_neon_vst4_v:
4078   case NEON::BI__builtin_neon_vst4q_v:
4079   case NEON::BI__builtin_neon_vst2_lane_v:
4080   case NEON::BI__builtin_neon_vst2q_lane_v:
4081   case NEON::BI__builtin_neon_vst3_lane_v:
4082   case NEON::BI__builtin_neon_vst3q_lane_v:
4083   case NEON::BI__builtin_neon_vst4_lane_v:
4084   case NEON::BI__builtin_neon_vst4q_lane_v: {
4085     llvm::Type *Tys[] = {Int8PtrTy, Ty};
4086     Ops.push_back(getAlignmentValue32(PtrOp0));
4087     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
4088   }
4089   case NEON::BI__builtin_neon_vsubhn_v: {
4090     llvm::VectorType *SrcTy =
4091         llvm::VectorType::getExtendedElementVectorType(VTy);
4092 
4093     // %sum = add <4 x i32> %lhs, %rhs
4094     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4095     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4096     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
4097 
4098     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4099     Constant *ShiftAmt =
4100         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4101     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
4102 
4103     // %res = trunc <4 x i32> %high to <4 x i16>
4104     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
4105   }
4106   case NEON::BI__builtin_neon_vtrn_v:
4107   case NEON::BI__builtin_neon_vtrnq_v: {
4108     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4109     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4110     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4111     Value *SV = nullptr;
4112 
4113     for (unsigned vi = 0; vi != 2; ++vi) {
4114       SmallVector<uint32_t, 16> Indices;
4115       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4116         Indices.push_back(i+vi);
4117         Indices.push_back(i+e+vi);
4118       }
4119       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4120       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
4121       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4122     }
4123     return SV;
4124   }
4125   case NEON::BI__builtin_neon_vtst_v:
4126   case NEON::BI__builtin_neon_vtstq_v: {
4127     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4128     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4129     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4130     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4131                                 ConstantAggregateZero::get(Ty));
4132     return Builder.CreateSExt(Ops[0], Ty, "vtst");
4133   }
4134   case NEON::BI__builtin_neon_vuzp_v:
4135   case NEON::BI__builtin_neon_vuzpq_v: {
4136     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4137     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4138     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4139     Value *SV = nullptr;
4140 
4141     for (unsigned vi = 0; vi != 2; ++vi) {
4142       SmallVector<uint32_t, 16> Indices;
4143       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4144         Indices.push_back(2*i+vi);
4145 
4146       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4147       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
4148       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4149     }
4150     return SV;
4151   }
4152   case NEON::BI__builtin_neon_vzip_v:
4153   case NEON::BI__builtin_neon_vzipq_v: {
4154     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4155     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4156     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4157     Value *SV = nullptr;
4158 
4159     for (unsigned vi = 0; vi != 2; ++vi) {
4160       SmallVector<uint32_t, 16> Indices;
4161       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4162         Indices.push_back((i + vi*e) >> 1);
4163         Indices.push_back(((i + vi*e) >> 1)+e);
4164       }
4165       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4166       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
4167       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4168     }
4169     return SV;
4170   }
4171   }
4172 
4173   assert(Int && "Expected valid intrinsic number");
4174 
4175   // Determine the type(s) of this overloaded AArch64 intrinsic.
4176   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
4177 
4178   Value *Result = EmitNeonCall(F, Ops, NameHint);
4179   llvm::Type *ResultType = ConvertType(E->getType());
4180   // AArch64 intrinsic one-element vector type cast to
4181   // scalar type expected by the builtin
4182   return Builder.CreateBitCast(Result, ResultType, NameHint);
4183 }
4184 
4185 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
4186     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
4187     const CmpInst::Predicate Ip, const Twine &Name) {
4188   llvm::Type *OTy = Op->getType();
4189 
4190   // FIXME: this is utterly horrific. We should not be looking at previous
4191   // codegen context to find out what needs doing. Unfortunately TableGen
4192   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
4193   // (etc).
4194   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
4195     OTy = BI->getOperand(0)->getType();
4196 
4197   Op = Builder.CreateBitCast(Op, OTy);
4198   if (OTy->getScalarType()->isFloatingPointTy()) {
4199     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
4200   } else {
4201     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
4202   }
4203   return Builder.CreateSExt(Op, Ty, Name);
4204 }
4205 
4206 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
4207                                  Value *ExtOp, Value *IndexOp,
4208                                  llvm::Type *ResTy, unsigned IntID,
4209                                  const char *Name) {
4210   SmallVector<Value *, 2> TblOps;
4211   if (ExtOp)
4212     TblOps.push_back(ExtOp);
4213 
4214   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
4215   SmallVector<uint32_t, 16> Indices;
4216   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
4217   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
4218     Indices.push_back(2*i);
4219     Indices.push_back(2*i+1);
4220   }
4221 
4222   int PairPos = 0, End = Ops.size() - 1;
4223   while (PairPos < End) {
4224     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4225                                                      Ops[PairPos+1], Indices,
4226                                                      Name));
4227     PairPos += 2;
4228   }
4229 
4230   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
4231   // of the 128-bit lookup table with zero.
4232   if (PairPos == End) {
4233     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
4234     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4235                                                      ZeroTbl, Indices, Name));
4236   }
4237 
4238   Function *TblF;
4239   TblOps.push_back(IndexOp);
4240   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
4241 
4242   return CGF.EmitNeonCall(TblF, TblOps, Name);
4243 }
4244 
4245 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
4246   unsigned Value;
4247   switch (BuiltinID) {
4248   default:
4249     return nullptr;
4250   case ARM::BI__builtin_arm_nop:
4251     Value = 0;
4252     break;
4253   case ARM::BI__builtin_arm_yield:
4254   case ARM::BI__yield:
4255     Value = 1;
4256     break;
4257   case ARM::BI__builtin_arm_wfe:
4258   case ARM::BI__wfe:
4259     Value = 2;
4260     break;
4261   case ARM::BI__builtin_arm_wfi:
4262   case ARM::BI__wfi:
4263     Value = 3;
4264     break;
4265   case ARM::BI__builtin_arm_sev:
4266   case ARM::BI__sev:
4267     Value = 4;
4268     break;
4269   case ARM::BI__builtin_arm_sevl:
4270   case ARM::BI__sevl:
4271     Value = 5;
4272     break;
4273   }
4274 
4275   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
4276                             llvm::ConstantInt::get(Int32Ty, Value));
4277 }
4278 
4279 // Generates the IR for the read/write special register builtin,
4280 // ValueType is the type of the value that is to be written or read,
4281 // RegisterType is the type of the register being written to or read from.
4282 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
4283                                          const CallExpr *E,
4284                                          llvm::Type *RegisterType,
4285                                          llvm::Type *ValueType,
4286                                          bool IsRead,
4287                                          StringRef SysReg = "") {
4288   // write and register intrinsics only support 32 and 64 bit operations.
4289   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
4290           && "Unsupported size for register.");
4291 
4292   CodeGen::CGBuilderTy &Builder = CGF.Builder;
4293   CodeGen::CodeGenModule &CGM = CGF.CGM;
4294   LLVMContext &Context = CGM.getLLVMContext();
4295 
4296   if (SysReg.empty()) {
4297     const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
4298     SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
4299   }
4300 
4301   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
4302   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4303   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4304 
4305   llvm::Type *Types[] = { RegisterType };
4306 
4307   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
4308   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
4309             && "Can't fit 64-bit value in 32-bit register");
4310 
4311   if (IsRead) {
4312     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
4313     llvm::Value *Call = Builder.CreateCall(F, Metadata);
4314 
4315     if (MixedTypes)
4316       // Read into 64 bit register and then truncate result to 32 bit.
4317       return Builder.CreateTrunc(Call, ValueType);
4318 
4319     if (ValueType->isPointerTy())
4320       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
4321       return Builder.CreateIntToPtr(Call, ValueType);
4322 
4323     return Call;
4324   }
4325 
4326   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
4327   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
4328   if (MixedTypes) {
4329     // Extend 32 bit write value to 64 bit to pass to write.
4330     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
4331     return Builder.CreateCall(F, { Metadata, ArgValue });
4332   }
4333 
4334   if (ValueType->isPointerTy()) {
4335     // Have VoidPtrTy ArgValue but want to return an i32/i64.
4336     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
4337     return Builder.CreateCall(F, { Metadata, ArgValue });
4338   }
4339 
4340   return Builder.CreateCall(F, { Metadata, ArgValue });
4341 }
4342 
4343 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
4344 /// argument that specifies the vector type.
4345 static bool HasExtraNeonArgument(unsigned BuiltinID) {
4346   switch (BuiltinID) {
4347   default: break;
4348   case NEON::BI__builtin_neon_vget_lane_i8:
4349   case NEON::BI__builtin_neon_vget_lane_i16:
4350   case NEON::BI__builtin_neon_vget_lane_i32:
4351   case NEON::BI__builtin_neon_vget_lane_i64:
4352   case NEON::BI__builtin_neon_vget_lane_f32:
4353   case NEON::BI__builtin_neon_vgetq_lane_i8:
4354   case NEON::BI__builtin_neon_vgetq_lane_i16:
4355   case NEON::BI__builtin_neon_vgetq_lane_i32:
4356   case NEON::BI__builtin_neon_vgetq_lane_i64:
4357   case NEON::BI__builtin_neon_vgetq_lane_f32:
4358   case NEON::BI__builtin_neon_vset_lane_i8:
4359   case NEON::BI__builtin_neon_vset_lane_i16:
4360   case NEON::BI__builtin_neon_vset_lane_i32:
4361   case NEON::BI__builtin_neon_vset_lane_i64:
4362   case NEON::BI__builtin_neon_vset_lane_f32:
4363   case NEON::BI__builtin_neon_vsetq_lane_i8:
4364   case NEON::BI__builtin_neon_vsetq_lane_i16:
4365   case NEON::BI__builtin_neon_vsetq_lane_i32:
4366   case NEON::BI__builtin_neon_vsetq_lane_i64:
4367   case NEON::BI__builtin_neon_vsetq_lane_f32:
4368   case NEON::BI__builtin_neon_vsha1h_u32:
4369   case NEON::BI__builtin_neon_vsha1cq_u32:
4370   case NEON::BI__builtin_neon_vsha1pq_u32:
4371   case NEON::BI__builtin_neon_vsha1mq_u32:
4372   case ARM::BI_MoveToCoprocessor:
4373   case ARM::BI_MoveToCoprocessor2:
4374     return false;
4375   }
4376   return true;
4377 }
4378 
4379 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
4380                                            const CallExpr *E) {
4381   if (auto Hint = GetValueForARMHint(BuiltinID))
4382     return Hint;
4383 
4384   if (BuiltinID == ARM::BI__emit) {
4385     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
4386     llvm::FunctionType *FTy =
4387         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
4388 
4389     APSInt Value;
4390     if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
4391       llvm_unreachable("Sema will ensure that the parameter is constant");
4392 
4393     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
4394 
4395     llvm::InlineAsm *Emit =
4396         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
4397                                  /*SideEffects=*/true)
4398                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
4399                                  /*SideEffects=*/true);
4400 
4401     return Builder.CreateCall(Emit);
4402   }
4403 
4404   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
4405     Value *Option = EmitScalarExpr(E->getArg(0));
4406     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
4407   }
4408 
4409   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
4410     Value *Address = EmitScalarExpr(E->getArg(0));
4411     Value *RW      = EmitScalarExpr(E->getArg(1));
4412     Value *IsData  = EmitScalarExpr(E->getArg(2));
4413 
4414     // Locality is not supported on ARM target
4415     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
4416 
4417     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4418     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4419   }
4420 
4421   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
4422     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4423     return Builder.CreateCall(
4424         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4425   }
4426 
4427   if (BuiltinID == ARM::BI__clear_cache) {
4428     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4429     const FunctionDecl *FD = E->getDirectCallee();
4430     Value *Ops[2];
4431     for (unsigned i = 0; i < 2; i++)
4432       Ops[i] = EmitScalarExpr(E->getArg(i));
4433     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4434     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4435     StringRef Name = FD->getName();
4436     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4437   }
4438 
4439   if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4440       BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4441     Function *F;
4442 
4443     switch (BuiltinID) {
4444     default: llvm_unreachable("unexpected builtin");
4445     case ARM::BI__builtin_arm_mcrr:
4446       F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4447       break;
4448     case ARM::BI__builtin_arm_mcrr2:
4449       F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4450       break;
4451     }
4452 
4453     // MCRR{2} instruction has 5 operands but
4454     // the intrinsic has 4 because Rt and Rt2
4455     // are represented as a single unsigned 64
4456     // bit integer in the intrinsic definition
4457     // but internally it's represented as 2 32
4458     // bit integers.
4459 
4460     Value *Coproc = EmitScalarExpr(E->getArg(0));
4461     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4462     Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4463     Value *CRm = EmitScalarExpr(E->getArg(3));
4464 
4465     Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4466     Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4467     Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4468     Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4469 
4470     return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4471   }
4472 
4473   if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4474       BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4475     Function *F;
4476 
4477     switch (BuiltinID) {
4478     default: llvm_unreachable("unexpected builtin");
4479     case ARM::BI__builtin_arm_mrrc:
4480       F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4481       break;
4482     case ARM::BI__builtin_arm_mrrc2:
4483       F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4484       break;
4485     }
4486 
4487     Value *Coproc = EmitScalarExpr(E->getArg(0));
4488     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4489     Value *CRm  = EmitScalarExpr(E->getArg(2));
4490     Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4491 
4492     // Returns an unsigned 64 bit integer, represented
4493     // as two 32 bit integers.
4494 
4495     Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4496     Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4497     Rt = Builder.CreateZExt(Rt, Int64Ty);
4498     Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4499 
4500     Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4501     RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4502     RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4503 
4504     return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4505   }
4506 
4507   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4508       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4509         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4510        getContext().getTypeSize(E->getType()) == 64) ||
4511       BuiltinID == ARM::BI__ldrexd) {
4512     Function *F;
4513 
4514     switch (BuiltinID) {
4515     default: llvm_unreachable("unexpected builtin");
4516     case ARM::BI__builtin_arm_ldaex:
4517       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4518       break;
4519     case ARM::BI__builtin_arm_ldrexd:
4520     case ARM::BI__builtin_arm_ldrex:
4521     case ARM::BI__ldrexd:
4522       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4523       break;
4524     }
4525 
4526     Value *LdPtr = EmitScalarExpr(E->getArg(0));
4527     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4528                                     "ldrexd");
4529 
4530     Value *Val0 = Builder.CreateExtractValue(Val, 1);
4531     Value *Val1 = Builder.CreateExtractValue(Val, 0);
4532     Val0 = Builder.CreateZExt(Val0, Int64Ty);
4533     Val1 = Builder.CreateZExt(Val1, Int64Ty);
4534 
4535     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4536     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4537     Val = Builder.CreateOr(Val, Val1);
4538     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4539   }
4540 
4541   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4542       BuiltinID == ARM::BI__builtin_arm_ldaex) {
4543     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4544 
4545     QualType Ty = E->getType();
4546     llvm::Type *RealResTy = ConvertType(Ty);
4547     llvm::Type *PtrTy = llvm::IntegerType::get(
4548         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
4549     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
4550 
4551     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4552                                        ? Intrinsic::arm_ldaex
4553                                        : Intrinsic::arm_ldrex,
4554                                    PtrTy);
4555     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4556 
4557     if (RealResTy->isPointerTy())
4558       return Builder.CreateIntToPtr(Val, RealResTy);
4559     else {
4560       llvm::Type *IntResTy = llvm::IntegerType::get(
4561           getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
4562       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4563       return Builder.CreateBitCast(Val, RealResTy);
4564     }
4565   }
4566 
4567   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
4568       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4569         BuiltinID == ARM::BI__builtin_arm_strex) &&
4570        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
4571     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4572                                        ? Intrinsic::arm_stlexd
4573                                        : Intrinsic::arm_strexd);
4574     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
4575 
4576     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4577     Value *Val = EmitScalarExpr(E->getArg(0));
4578     Builder.CreateStore(Val, Tmp);
4579 
4580     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4581     Val = Builder.CreateLoad(LdPtr);
4582 
4583     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4584     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4585     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4586     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4587   }
4588 
4589   if (BuiltinID == ARM::BI__builtin_arm_strex ||
4590       BuiltinID == ARM::BI__builtin_arm_stlex) {
4591     Value *StoreVal = EmitScalarExpr(E->getArg(0));
4592     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4593 
4594     QualType Ty = E->getArg(0)->getType();
4595     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4596                                                  getContext().getTypeSize(Ty));
4597     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4598 
4599     if (StoreVal->getType()->isPointerTy())
4600       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4601     else {
4602       llvm::Type *IntTy = llvm::IntegerType::get(
4603           getLLVMContext(),
4604           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
4605       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
4606       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4607     }
4608 
4609     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4610                                        ? Intrinsic::arm_stlex
4611                                        : Intrinsic::arm_strex,
4612                                    StoreAddr->getType());
4613     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4614   }
4615 
4616   switch (BuiltinID) {
4617   case ARM::BI__iso_volatile_load8:
4618   case ARM::BI__iso_volatile_load16:
4619   case ARM::BI__iso_volatile_load32:
4620   case ARM::BI__iso_volatile_load64: {
4621     Value *Ptr = EmitScalarExpr(E->getArg(0));
4622     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4623     CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
4624     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4625                                              LoadSize.getQuantity() * 8);
4626     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4627     llvm::LoadInst *Load =
4628       Builder.CreateAlignedLoad(Ptr, LoadSize);
4629     Load->setVolatile(true);
4630     return Load;
4631   }
4632   case ARM::BI__iso_volatile_store8:
4633   case ARM::BI__iso_volatile_store16:
4634   case ARM::BI__iso_volatile_store32:
4635   case ARM::BI__iso_volatile_store64: {
4636     Value *Ptr = EmitScalarExpr(E->getArg(0));
4637     Value *Value = EmitScalarExpr(E->getArg(1));
4638     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4639     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
4640     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4641                                              StoreSize.getQuantity() * 8);
4642     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4643     llvm::StoreInst *Store =
4644       Builder.CreateAlignedStore(Value, Ptr,
4645                                  StoreSize);
4646     Store->setVolatile(true);
4647     return Store;
4648   }
4649   }
4650 
4651   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
4652     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4653     return Builder.CreateCall(F);
4654   }
4655 
4656   // CRC32
4657   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4658   switch (BuiltinID) {
4659   case ARM::BI__builtin_arm_crc32b:
4660     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
4661   case ARM::BI__builtin_arm_crc32cb:
4662     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
4663   case ARM::BI__builtin_arm_crc32h:
4664     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
4665   case ARM::BI__builtin_arm_crc32ch:
4666     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
4667   case ARM::BI__builtin_arm_crc32w:
4668   case ARM::BI__builtin_arm_crc32d:
4669     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
4670   case ARM::BI__builtin_arm_crc32cw:
4671   case ARM::BI__builtin_arm_crc32cd:
4672     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
4673   }
4674 
4675   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4676     Value *Arg0 = EmitScalarExpr(E->getArg(0));
4677     Value *Arg1 = EmitScalarExpr(E->getArg(1));
4678 
4679     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
4680     // intrinsics, hence we need different codegen for these cases.
4681     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
4682         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
4683       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4684       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
4685       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
4686       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
4687 
4688       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4689       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
4690       return Builder.CreateCall(F, {Res, Arg1b});
4691     } else {
4692       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
4693 
4694       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4695       return Builder.CreateCall(F, {Arg0, Arg1});
4696     }
4697   }
4698 
4699   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
4700       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4701       BuiltinID == ARM::BI__builtin_arm_rsrp ||
4702       BuiltinID == ARM::BI__builtin_arm_wsr ||
4703       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
4704       BuiltinID == ARM::BI__builtin_arm_wsrp) {
4705 
4706     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
4707                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4708                   BuiltinID == ARM::BI__builtin_arm_rsrp;
4709 
4710     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
4711                             BuiltinID == ARM::BI__builtin_arm_wsrp;
4712 
4713     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4714                    BuiltinID == ARM::BI__builtin_arm_wsr64;
4715 
4716     llvm::Type *ValueType;
4717     llvm::Type *RegisterType;
4718     if (IsPointerBuiltin) {
4719       ValueType = VoidPtrTy;
4720       RegisterType = Int32Ty;
4721     } else if (Is64Bit) {
4722       ValueType = RegisterType = Int64Ty;
4723     } else {
4724       ValueType = RegisterType = Int32Ty;
4725     }
4726 
4727     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4728   }
4729 
4730   // Find out if any arguments are required to be integer constant
4731   // expressions.
4732   unsigned ICEArguments = 0;
4733   ASTContext::GetBuiltinTypeError Error;
4734   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4735   assert(Error == ASTContext::GE_None && "Should not codegen an error");
4736 
4737   auto getAlignmentValue32 = [&](Address addr) -> Value* {
4738     return Builder.getInt32(addr.getAlignment().getQuantity());
4739   };
4740 
4741   Address PtrOp0 = Address::invalid();
4742   Address PtrOp1 = Address::invalid();
4743   SmallVector<Value*, 4> Ops;
4744   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
4745   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
4746   for (unsigned i = 0, e = NumArgs; i != e; i++) {
4747     if (i == 0) {
4748       switch (BuiltinID) {
4749       case NEON::BI__builtin_neon_vld1_v:
4750       case NEON::BI__builtin_neon_vld1q_v:
4751       case NEON::BI__builtin_neon_vld1q_lane_v:
4752       case NEON::BI__builtin_neon_vld1_lane_v:
4753       case NEON::BI__builtin_neon_vld1_dup_v:
4754       case NEON::BI__builtin_neon_vld1q_dup_v:
4755       case NEON::BI__builtin_neon_vst1_v:
4756       case NEON::BI__builtin_neon_vst1q_v:
4757       case NEON::BI__builtin_neon_vst1q_lane_v:
4758       case NEON::BI__builtin_neon_vst1_lane_v:
4759       case NEON::BI__builtin_neon_vst2_v:
4760       case NEON::BI__builtin_neon_vst2q_v:
4761       case NEON::BI__builtin_neon_vst2_lane_v:
4762       case NEON::BI__builtin_neon_vst2q_lane_v:
4763       case NEON::BI__builtin_neon_vst3_v:
4764       case NEON::BI__builtin_neon_vst3q_v:
4765       case NEON::BI__builtin_neon_vst3_lane_v:
4766       case NEON::BI__builtin_neon_vst3q_lane_v:
4767       case NEON::BI__builtin_neon_vst4_v:
4768       case NEON::BI__builtin_neon_vst4q_v:
4769       case NEON::BI__builtin_neon_vst4_lane_v:
4770       case NEON::BI__builtin_neon_vst4q_lane_v:
4771         // Get the alignment for the argument in addition to the value;
4772         // we'll use it later.
4773         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
4774         Ops.push_back(PtrOp0.getPointer());
4775         continue;
4776       }
4777     }
4778     if (i == 1) {
4779       switch (BuiltinID) {
4780       case NEON::BI__builtin_neon_vld2_v:
4781       case NEON::BI__builtin_neon_vld2q_v:
4782       case NEON::BI__builtin_neon_vld3_v:
4783       case NEON::BI__builtin_neon_vld3q_v:
4784       case NEON::BI__builtin_neon_vld4_v:
4785       case NEON::BI__builtin_neon_vld4q_v:
4786       case NEON::BI__builtin_neon_vld2_lane_v:
4787       case NEON::BI__builtin_neon_vld2q_lane_v:
4788       case NEON::BI__builtin_neon_vld3_lane_v:
4789       case NEON::BI__builtin_neon_vld3q_lane_v:
4790       case NEON::BI__builtin_neon_vld4_lane_v:
4791       case NEON::BI__builtin_neon_vld4q_lane_v:
4792       case NEON::BI__builtin_neon_vld2_dup_v:
4793       case NEON::BI__builtin_neon_vld3_dup_v:
4794       case NEON::BI__builtin_neon_vld4_dup_v:
4795         // Get the alignment for the argument in addition to the value;
4796         // we'll use it later.
4797         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
4798         Ops.push_back(PtrOp1.getPointer());
4799         continue;
4800       }
4801     }
4802 
4803     if ((ICEArguments & (1 << i)) == 0) {
4804       Ops.push_back(EmitScalarExpr(E->getArg(i)));
4805     } else {
4806       // If this is required to be a constant, constant fold it so that we know
4807       // that the generated intrinsic gets a ConstantInt.
4808       llvm::APSInt Result;
4809       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4810       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
4811       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4812     }
4813   }
4814 
4815   switch (BuiltinID) {
4816   default: break;
4817 
4818   case NEON::BI__builtin_neon_vget_lane_i8:
4819   case NEON::BI__builtin_neon_vget_lane_i16:
4820   case NEON::BI__builtin_neon_vget_lane_i32:
4821   case NEON::BI__builtin_neon_vget_lane_i64:
4822   case NEON::BI__builtin_neon_vget_lane_f32:
4823   case NEON::BI__builtin_neon_vgetq_lane_i8:
4824   case NEON::BI__builtin_neon_vgetq_lane_i16:
4825   case NEON::BI__builtin_neon_vgetq_lane_i32:
4826   case NEON::BI__builtin_neon_vgetq_lane_i64:
4827   case NEON::BI__builtin_neon_vgetq_lane_f32:
4828     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
4829 
4830   case NEON::BI__builtin_neon_vset_lane_i8:
4831   case NEON::BI__builtin_neon_vset_lane_i16:
4832   case NEON::BI__builtin_neon_vset_lane_i32:
4833   case NEON::BI__builtin_neon_vset_lane_i64:
4834   case NEON::BI__builtin_neon_vset_lane_f32:
4835   case NEON::BI__builtin_neon_vsetq_lane_i8:
4836   case NEON::BI__builtin_neon_vsetq_lane_i16:
4837   case NEON::BI__builtin_neon_vsetq_lane_i32:
4838   case NEON::BI__builtin_neon_vsetq_lane_i64:
4839   case NEON::BI__builtin_neon_vsetq_lane_f32:
4840     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4841 
4842   case NEON::BI__builtin_neon_vsha1h_u32:
4843     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
4844                         "vsha1h");
4845   case NEON::BI__builtin_neon_vsha1cq_u32:
4846     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
4847                         "vsha1h");
4848   case NEON::BI__builtin_neon_vsha1pq_u32:
4849     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
4850                         "vsha1h");
4851   case NEON::BI__builtin_neon_vsha1mq_u32:
4852     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
4853                         "vsha1h");
4854 
4855   // The ARM _MoveToCoprocessor builtins put the input register value as
4856   // the first argument, but the LLVM intrinsic expects it as the third one.
4857   case ARM::BI_MoveToCoprocessor:
4858   case ARM::BI_MoveToCoprocessor2: {
4859     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
4860                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
4861     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
4862                                   Ops[3], Ops[4], Ops[5]});
4863   }
4864   case ARM::BI_BitScanForward:
4865   case ARM::BI_BitScanForward64:
4866     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
4867   case ARM::BI_BitScanReverse:
4868   case ARM::BI_BitScanReverse64:
4869     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
4870 
4871   case ARM::BI_InterlockedAnd64:
4872     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
4873   case ARM::BI_InterlockedExchange64:
4874     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
4875   case ARM::BI_InterlockedExchangeAdd64:
4876     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
4877   case ARM::BI_InterlockedExchangeSub64:
4878     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
4879   case ARM::BI_InterlockedOr64:
4880     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
4881   case ARM::BI_InterlockedXor64:
4882     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
4883   case ARM::BI_InterlockedDecrement64:
4884     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
4885   case ARM::BI_InterlockedIncrement64:
4886     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
4887   }
4888 
4889   // Get the last argument, which specifies the vector type.
4890   assert(HasExtraArg);
4891   llvm::APSInt Result;
4892   const Expr *Arg = E->getArg(E->getNumArgs()-1);
4893   if (!Arg->isIntegerConstantExpr(Result, getContext()))
4894     return nullptr;
4895 
4896   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
4897       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
4898     // Determine the overloaded type of this builtin.
4899     llvm::Type *Ty;
4900     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
4901       Ty = FloatTy;
4902     else
4903       Ty = DoubleTy;
4904 
4905     // Determine whether this is an unsigned conversion or not.
4906     bool usgn = Result.getZExtValue() == 1;
4907     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
4908 
4909     // Call the appropriate intrinsic.
4910     Function *F = CGM.getIntrinsic(Int, Ty);
4911     return Builder.CreateCall(F, Ops, "vcvtr");
4912   }
4913 
4914   // Determine the type of this overloaded NEON intrinsic.
4915   NeonTypeFlags Type(Result.getZExtValue());
4916   bool usgn = Type.isUnsigned();
4917   bool rightShift = false;
4918 
4919   llvm::VectorType *VTy = GetNeonType(this, Type);
4920   llvm::Type *Ty = VTy;
4921   if (!Ty)
4922     return nullptr;
4923 
4924   // Many NEON builtins have identical semantics and uses in ARM and
4925   // AArch64. Emit these in a single function.
4926   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
4927   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4928       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
4929   if (Builtin)
4930     return EmitCommonNeonBuiltinExpr(
4931         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
4932         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
4933 
4934   unsigned Int;
4935   switch (BuiltinID) {
4936   default: return nullptr;
4937   case NEON::BI__builtin_neon_vld1q_lane_v:
4938     // Handle 64-bit integer elements as a special case.  Use shuffles of
4939     // one-element vectors to avoid poor code for i64 in the backend.
4940     if (VTy->getElementType()->isIntegerTy(64)) {
4941       // Extract the other lane.
4942       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4943       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
4944       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
4945       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4946       // Load the value as a one-element vector.
4947       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
4948       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4949       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
4950       Value *Align = getAlignmentValue32(PtrOp0);
4951       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
4952       // Combine them.
4953       uint32_t Indices[] = {1 - Lane, Lane};
4954       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
4955       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
4956     }
4957     // fall through
4958   case NEON::BI__builtin_neon_vld1_lane_v: {
4959     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4960     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
4961     Value *Ld = Builder.CreateLoad(PtrOp0);
4962     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
4963   }
4964   case NEON::BI__builtin_neon_vld2_dup_v:
4965   case NEON::BI__builtin_neon_vld3_dup_v:
4966   case NEON::BI__builtin_neon_vld4_dup_v: {
4967     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
4968     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
4969       switch (BuiltinID) {
4970       case NEON::BI__builtin_neon_vld2_dup_v:
4971         Int = Intrinsic::arm_neon_vld2;
4972         break;
4973       case NEON::BI__builtin_neon_vld3_dup_v:
4974         Int = Intrinsic::arm_neon_vld3;
4975         break;
4976       case NEON::BI__builtin_neon_vld4_dup_v:
4977         Int = Intrinsic::arm_neon_vld4;
4978         break;
4979       default: llvm_unreachable("unknown vld_dup intrinsic?");
4980       }
4981       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4982       Function *F = CGM.getIntrinsic(Int, Tys);
4983       llvm::Value *Align = getAlignmentValue32(PtrOp1);
4984       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
4985       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4986       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4987       return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4988     }
4989     switch (BuiltinID) {
4990     case NEON::BI__builtin_neon_vld2_dup_v:
4991       Int = Intrinsic::arm_neon_vld2lane;
4992       break;
4993     case NEON::BI__builtin_neon_vld3_dup_v:
4994       Int = Intrinsic::arm_neon_vld3lane;
4995       break;
4996     case NEON::BI__builtin_neon_vld4_dup_v:
4997       Int = Intrinsic::arm_neon_vld4lane;
4998       break;
4999     default: llvm_unreachable("unknown vld_dup intrinsic?");
5000     }
5001     llvm::Type *Tys[] = {Ty, Int8PtrTy};
5002     Function *F = CGM.getIntrinsic(Int, Tys);
5003     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
5004 
5005     SmallVector<Value*, 6> Args;
5006     Args.push_back(Ops[1]);
5007     Args.append(STy->getNumElements(), UndefValue::get(Ty));
5008 
5009     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
5010     Args.push_back(CI);
5011     Args.push_back(getAlignmentValue32(PtrOp1));
5012 
5013     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
5014     // splat lane 0 to all elts in each vector of the result.
5015     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5016       Value *Val = Builder.CreateExtractValue(Ops[1], i);
5017       Value *Elt = Builder.CreateBitCast(Val, Ty);
5018       Elt = EmitNeonSplat(Elt, CI);
5019       Elt = Builder.CreateBitCast(Elt, Val->getType());
5020       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
5021     }
5022     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5023     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5024     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5025   }
5026   case NEON::BI__builtin_neon_vqrshrn_n_v:
5027     Int =
5028       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
5029     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
5030                         1, true);
5031   case NEON::BI__builtin_neon_vqrshrun_n_v:
5032     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
5033                         Ops, "vqrshrun_n", 1, true);
5034   case NEON::BI__builtin_neon_vqshrn_n_v:
5035     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
5036     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
5037                         1, true);
5038   case NEON::BI__builtin_neon_vqshrun_n_v:
5039     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
5040                         Ops, "vqshrun_n", 1, true);
5041   case NEON::BI__builtin_neon_vrecpe_v:
5042   case NEON::BI__builtin_neon_vrecpeq_v:
5043     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
5044                         Ops, "vrecpe");
5045   case NEON::BI__builtin_neon_vrshrn_n_v:
5046     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
5047                         Ops, "vrshrn_n", 1, true);
5048   case NEON::BI__builtin_neon_vrsra_n_v:
5049   case NEON::BI__builtin_neon_vrsraq_n_v:
5050     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5051     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5052     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
5053     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
5054     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
5055     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
5056   case NEON::BI__builtin_neon_vsri_n_v:
5057   case NEON::BI__builtin_neon_vsriq_n_v:
5058     rightShift = true;
5059   case NEON::BI__builtin_neon_vsli_n_v:
5060   case NEON::BI__builtin_neon_vsliq_n_v:
5061     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
5062     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
5063                         Ops, "vsli_n");
5064   case NEON::BI__builtin_neon_vsra_n_v:
5065   case NEON::BI__builtin_neon_vsraq_n_v:
5066     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5067     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
5068     return Builder.CreateAdd(Ops[0], Ops[1]);
5069   case NEON::BI__builtin_neon_vst1q_lane_v:
5070     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
5071     // a one-element vector and avoid poor code for i64 in the backend.
5072     if (VTy->getElementType()->isIntegerTy(64)) {
5073       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5074       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
5075       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5076       Ops[2] = getAlignmentValue32(PtrOp0);
5077       llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
5078       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
5079                                                  Tys), Ops);
5080     }
5081     // fall through
5082   case NEON::BI__builtin_neon_vst1_lane_v: {
5083     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5084     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
5085     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5086     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
5087     return St;
5088   }
5089   case NEON::BI__builtin_neon_vtbl1_v:
5090     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
5091                         Ops, "vtbl1");
5092   case NEON::BI__builtin_neon_vtbl2_v:
5093     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
5094                         Ops, "vtbl2");
5095   case NEON::BI__builtin_neon_vtbl3_v:
5096     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
5097                         Ops, "vtbl3");
5098   case NEON::BI__builtin_neon_vtbl4_v:
5099     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
5100                         Ops, "vtbl4");
5101   case NEON::BI__builtin_neon_vtbx1_v:
5102     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
5103                         Ops, "vtbx1");
5104   case NEON::BI__builtin_neon_vtbx2_v:
5105     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
5106                         Ops, "vtbx2");
5107   case NEON::BI__builtin_neon_vtbx3_v:
5108     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
5109                         Ops, "vtbx3");
5110   case NEON::BI__builtin_neon_vtbx4_v:
5111     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
5112                         Ops, "vtbx4");
5113   }
5114 }
5115 
5116 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
5117                                       const CallExpr *E,
5118                                       SmallVectorImpl<Value *> &Ops) {
5119   unsigned int Int = 0;
5120   const char *s = nullptr;
5121 
5122   switch (BuiltinID) {
5123   default:
5124     return nullptr;
5125   case NEON::BI__builtin_neon_vtbl1_v:
5126   case NEON::BI__builtin_neon_vqtbl1_v:
5127   case NEON::BI__builtin_neon_vqtbl1q_v:
5128   case NEON::BI__builtin_neon_vtbl2_v:
5129   case NEON::BI__builtin_neon_vqtbl2_v:
5130   case NEON::BI__builtin_neon_vqtbl2q_v:
5131   case NEON::BI__builtin_neon_vtbl3_v:
5132   case NEON::BI__builtin_neon_vqtbl3_v:
5133   case NEON::BI__builtin_neon_vqtbl3q_v:
5134   case NEON::BI__builtin_neon_vtbl4_v:
5135   case NEON::BI__builtin_neon_vqtbl4_v:
5136   case NEON::BI__builtin_neon_vqtbl4q_v:
5137     break;
5138   case NEON::BI__builtin_neon_vtbx1_v:
5139   case NEON::BI__builtin_neon_vqtbx1_v:
5140   case NEON::BI__builtin_neon_vqtbx1q_v:
5141   case NEON::BI__builtin_neon_vtbx2_v:
5142   case NEON::BI__builtin_neon_vqtbx2_v:
5143   case NEON::BI__builtin_neon_vqtbx2q_v:
5144   case NEON::BI__builtin_neon_vtbx3_v:
5145   case NEON::BI__builtin_neon_vqtbx3_v:
5146   case NEON::BI__builtin_neon_vqtbx3q_v:
5147   case NEON::BI__builtin_neon_vtbx4_v:
5148   case NEON::BI__builtin_neon_vqtbx4_v:
5149   case NEON::BI__builtin_neon_vqtbx4q_v:
5150     break;
5151   }
5152 
5153   assert(E->getNumArgs() >= 3);
5154 
5155   // Get the last argument, which specifies the vector type.
5156   llvm::APSInt Result;
5157   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
5158   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
5159     return nullptr;
5160 
5161   // Determine the type of this overloaded NEON intrinsic.
5162   NeonTypeFlags Type(Result.getZExtValue());
5163   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
5164   if (!Ty)
5165     return nullptr;
5166 
5167   CodeGen::CGBuilderTy &Builder = CGF.Builder;
5168 
5169   // AArch64 scalar builtins are not overloaded, they do not have an extra
5170   // argument that specifies the vector type, need to handle each case.
5171   switch (BuiltinID) {
5172   case NEON::BI__builtin_neon_vtbl1_v: {
5173     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
5174                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
5175                               "vtbl1");
5176   }
5177   case NEON::BI__builtin_neon_vtbl2_v: {
5178     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
5179                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
5180                               "vtbl1");
5181   }
5182   case NEON::BI__builtin_neon_vtbl3_v: {
5183     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
5184                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
5185                               "vtbl2");
5186   }
5187   case NEON::BI__builtin_neon_vtbl4_v: {
5188     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
5189                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
5190                               "vtbl2");
5191   }
5192   case NEON::BI__builtin_neon_vtbx1_v: {
5193     Value *TblRes =
5194         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
5195                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
5196 
5197     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
5198     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
5199     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5200 
5201     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5202     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5203     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5204   }
5205   case NEON::BI__builtin_neon_vtbx2_v: {
5206     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
5207                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
5208                               "vtbx1");
5209   }
5210   case NEON::BI__builtin_neon_vtbx3_v: {
5211     Value *TblRes =
5212         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
5213                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
5214 
5215     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
5216     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
5217                                            TwentyFourV);
5218     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5219 
5220     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5221     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5222     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5223   }
5224   case NEON::BI__builtin_neon_vtbx4_v: {
5225     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
5226                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
5227                               "vtbx2");
5228   }
5229   case NEON::BI__builtin_neon_vqtbl1_v:
5230   case NEON::BI__builtin_neon_vqtbl1q_v:
5231     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
5232   case NEON::BI__builtin_neon_vqtbl2_v:
5233   case NEON::BI__builtin_neon_vqtbl2q_v: {
5234     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
5235   case NEON::BI__builtin_neon_vqtbl3_v:
5236   case NEON::BI__builtin_neon_vqtbl3q_v:
5237     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
5238   case NEON::BI__builtin_neon_vqtbl4_v:
5239   case NEON::BI__builtin_neon_vqtbl4q_v:
5240     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
5241   case NEON::BI__builtin_neon_vqtbx1_v:
5242   case NEON::BI__builtin_neon_vqtbx1q_v:
5243     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
5244   case NEON::BI__builtin_neon_vqtbx2_v:
5245   case NEON::BI__builtin_neon_vqtbx2q_v:
5246     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
5247   case NEON::BI__builtin_neon_vqtbx3_v:
5248   case NEON::BI__builtin_neon_vqtbx3q_v:
5249     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
5250   case NEON::BI__builtin_neon_vqtbx4_v:
5251   case NEON::BI__builtin_neon_vqtbx4q_v:
5252     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
5253   }
5254   }
5255 
5256   if (!Int)
5257     return nullptr;
5258 
5259   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
5260   return CGF.EmitNeonCall(F, Ops, s);
5261 }
5262 
5263 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
5264   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
5265   Op = Builder.CreateBitCast(Op, Int16Ty);
5266   Value *V = UndefValue::get(VTy);
5267   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
5268   Op = Builder.CreateInsertElement(V, Op, CI);
5269   return Op;
5270 }
5271 
5272 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
5273                                                const CallExpr *E) {
5274   unsigned HintID = static_cast<unsigned>(-1);
5275   switch (BuiltinID) {
5276   default: break;
5277   case AArch64::BI__builtin_arm_nop:
5278     HintID = 0;
5279     break;
5280   case AArch64::BI__builtin_arm_yield:
5281     HintID = 1;
5282     break;
5283   case AArch64::BI__builtin_arm_wfe:
5284     HintID = 2;
5285     break;
5286   case AArch64::BI__builtin_arm_wfi:
5287     HintID = 3;
5288     break;
5289   case AArch64::BI__builtin_arm_sev:
5290     HintID = 4;
5291     break;
5292   case AArch64::BI__builtin_arm_sevl:
5293     HintID = 5;
5294     break;
5295   }
5296 
5297   if (HintID != static_cast<unsigned>(-1)) {
5298     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5299     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5300   }
5301 
5302   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
5303     Value *Address         = EmitScalarExpr(E->getArg(0));
5304     Value *RW              = EmitScalarExpr(E->getArg(1));
5305     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
5306     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
5307     Value *IsData          = EmitScalarExpr(E->getArg(4));
5308 
5309     Value *Locality = nullptr;
5310     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
5311       // Temporal fetch, needs to convert cache level to locality.
5312       Locality = llvm::ConstantInt::get(Int32Ty,
5313         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
5314     } else {
5315       // Streaming fetch.
5316       Locality = llvm::ConstantInt::get(Int32Ty, 0);
5317     }
5318 
5319     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
5320     // PLDL3STRM or PLDL2STRM.
5321     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
5322     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5323   }
5324 
5325   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
5326     assert((getContext().getTypeSize(E->getType()) == 32) &&
5327            "rbit of unusual size!");
5328     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5329     return Builder.CreateCall(
5330         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5331   }
5332   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
5333     assert((getContext().getTypeSize(E->getType()) == 64) &&
5334            "rbit of unusual size!");
5335     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5336     return Builder.CreateCall(
5337         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5338   }
5339 
5340   if (BuiltinID == AArch64::BI__clear_cache) {
5341     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5342     const FunctionDecl *FD = E->getDirectCallee();
5343     Value *Ops[2];
5344     for (unsigned i = 0; i < 2; i++)
5345       Ops[i] = EmitScalarExpr(E->getArg(i));
5346     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5347     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5348     StringRef Name = FD->getName();
5349     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5350   }
5351 
5352   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5353       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
5354       getContext().getTypeSize(E->getType()) == 128) {
5355     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5356                                        ? Intrinsic::aarch64_ldaxp
5357                                        : Intrinsic::aarch64_ldxp);
5358 
5359     Value *LdPtr = EmitScalarExpr(E->getArg(0));
5360     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
5361                                     "ldxp");
5362 
5363     Value *Val0 = Builder.CreateExtractValue(Val, 1);
5364     Value *Val1 = Builder.CreateExtractValue(Val, 0);
5365     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5366     Val0 = Builder.CreateZExt(Val0, Int128Ty);
5367     Val1 = Builder.CreateZExt(Val1, Int128Ty);
5368 
5369     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5370     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5371     Val = Builder.CreateOr(Val, Val1);
5372     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5373   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5374              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
5375     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5376 
5377     QualType Ty = E->getType();
5378     llvm::Type *RealResTy = ConvertType(Ty);
5379     llvm::Type *PtrTy = llvm::IntegerType::get(
5380         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
5381     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
5382 
5383     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5384                                        ? Intrinsic::aarch64_ldaxr
5385                                        : Intrinsic::aarch64_ldxr,
5386                                    PtrTy);
5387     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5388 
5389     if (RealResTy->isPointerTy())
5390       return Builder.CreateIntToPtr(Val, RealResTy);
5391 
5392     llvm::Type *IntResTy = llvm::IntegerType::get(
5393         getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5394     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
5395     return Builder.CreateBitCast(Val, RealResTy);
5396   }
5397 
5398   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
5399        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
5400       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5401     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5402                                        ? Intrinsic::aarch64_stlxp
5403                                        : Intrinsic::aarch64_stxp);
5404     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
5405 
5406     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5407     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5408 
5409     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
5410     llvm::Value *Val = Builder.CreateLoad(Tmp);
5411 
5412     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5413     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5414     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
5415                                          Int8PtrTy);
5416     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5417   }
5418 
5419   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
5420       BuiltinID == AArch64::BI__builtin_arm_stlex) {
5421     Value *StoreVal = EmitScalarExpr(E->getArg(0));
5422     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5423 
5424     QualType Ty = E->getArg(0)->getType();
5425     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
5426                                                  getContext().getTypeSize(Ty));
5427     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
5428 
5429     if (StoreVal->getType()->isPointerTy())
5430       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5431     else {
5432       llvm::Type *IntTy = llvm::IntegerType::get(
5433           getLLVMContext(),
5434           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5435       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5436       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5437     }
5438 
5439     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5440                                        ? Intrinsic::aarch64_stlxr
5441                                        : Intrinsic::aarch64_stxr,
5442                                    StoreAddr->getType());
5443     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5444   }
5445 
5446   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
5447     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5448     return Builder.CreateCall(F);
5449   }
5450 
5451   // CRC32
5452   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5453   switch (BuiltinID) {
5454   case AArch64::BI__builtin_arm_crc32b:
5455     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5456   case AArch64::BI__builtin_arm_crc32cb:
5457     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5458   case AArch64::BI__builtin_arm_crc32h:
5459     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5460   case AArch64::BI__builtin_arm_crc32ch:
5461     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5462   case AArch64::BI__builtin_arm_crc32w:
5463     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5464   case AArch64::BI__builtin_arm_crc32cw:
5465     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5466   case AArch64::BI__builtin_arm_crc32d:
5467     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5468   case AArch64::BI__builtin_arm_crc32cd:
5469     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5470   }
5471 
5472   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5473     Value *Arg0 = EmitScalarExpr(E->getArg(0));
5474     Value *Arg1 = EmitScalarExpr(E->getArg(1));
5475     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5476 
5477     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5478     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5479 
5480     return Builder.CreateCall(F, {Arg0, Arg1});
5481   }
5482 
5483   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
5484       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5485       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5486       BuiltinID == AArch64::BI__builtin_arm_wsr ||
5487       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
5488       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
5489 
5490     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
5491                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5492                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
5493 
5494     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5495                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
5496 
5497     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
5498                    BuiltinID != AArch64::BI__builtin_arm_wsr;
5499 
5500     llvm::Type *ValueType;
5501     llvm::Type *RegisterType = Int64Ty;
5502     if (IsPointerBuiltin) {
5503       ValueType = VoidPtrTy;
5504     } else if (Is64Bit) {
5505       ValueType = Int64Ty;
5506     } else {
5507       ValueType = Int32Ty;
5508     }
5509 
5510     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5511   }
5512 
5513   // Find out if any arguments are required to be integer constant
5514   // expressions.
5515   unsigned ICEArguments = 0;
5516   ASTContext::GetBuiltinTypeError Error;
5517   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5518   assert(Error == ASTContext::GE_None && "Should not codegen an error");
5519 
5520   llvm::SmallVector<Value*, 4> Ops;
5521   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5522     if ((ICEArguments & (1 << i)) == 0) {
5523       Ops.push_back(EmitScalarExpr(E->getArg(i)));
5524     } else {
5525       // If this is required to be a constant, constant fold it so that we know
5526       // that the generated intrinsic gets a ConstantInt.
5527       llvm::APSInt Result;
5528       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5529       assert(IsConst && "Constant arg isn't actually constant?");
5530       (void)IsConst;
5531       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5532     }
5533   }
5534 
5535   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
5536   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5537       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5538 
5539   if (Builtin) {
5540     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5541     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
5542     assert(Result && "SISD intrinsic should have been handled");
5543     return Result;
5544   }
5545 
5546   llvm::APSInt Result;
5547   const Expr *Arg = E->getArg(E->getNumArgs()-1);
5548   NeonTypeFlags Type(0);
5549   if (Arg->isIntegerConstantExpr(Result, getContext()))
5550     // Determine the type of this overloaded NEON intrinsic.
5551     Type = NeonTypeFlags(Result.getZExtValue());
5552 
5553   bool usgn = Type.isUnsigned();
5554   bool quad = Type.isQuad();
5555 
5556   // Handle non-overloaded intrinsics first.
5557   switch (BuiltinID) {
5558   default: break;
5559   case NEON::BI__builtin_neon_vldrq_p128: {
5560     llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5561     llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
5562     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
5563     return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5564                                      CharUnits::fromQuantity(16));
5565   }
5566   case NEON::BI__builtin_neon_vstrq_p128: {
5567     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5568     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
5569     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5570   }
5571   case NEON::BI__builtin_neon_vcvts_u32_f32:
5572   case NEON::BI__builtin_neon_vcvtd_u64_f64:
5573     usgn = true;
5574     // FALL THROUGH
5575   case NEON::BI__builtin_neon_vcvts_s32_f32:
5576   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
5577     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5578     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5579     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5580     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5581     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
5582     if (usgn)
5583       return Builder.CreateFPToUI(Ops[0], InTy);
5584     return Builder.CreateFPToSI(Ops[0], InTy);
5585   }
5586   case NEON::BI__builtin_neon_vcvts_f32_u32:
5587   case NEON::BI__builtin_neon_vcvtd_f64_u64:
5588     usgn = true;
5589     // FALL THROUGH
5590   case NEON::BI__builtin_neon_vcvts_f32_s32:
5591   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5592     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5593     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5594     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5595     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5596     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5597     if (usgn)
5598       return Builder.CreateUIToFP(Ops[0], FTy);
5599     return Builder.CreateSIToFP(Ops[0], FTy);
5600   }
5601   case NEON::BI__builtin_neon_vpaddd_s64: {
5602     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
5603     Value *Vec = EmitScalarExpr(E->getArg(0));
5604     // The vector is v2f64, so make sure it's bitcast to that.
5605     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5606     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5607     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5608     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5609     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5610     // Pairwise addition of a v2f64 into a scalar f64.
5611     return Builder.CreateAdd(Op0, Op1, "vpaddd");
5612   }
5613   case NEON::BI__builtin_neon_vpaddd_f64: {
5614     llvm::Type *Ty =
5615       llvm::VectorType::get(DoubleTy, 2);
5616     Value *Vec = EmitScalarExpr(E->getArg(0));
5617     // The vector is v2f64, so make sure it's bitcast to that.
5618     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5619     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5620     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5621     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5622     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5623     // Pairwise addition of a v2f64 into a scalar f64.
5624     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5625   }
5626   case NEON::BI__builtin_neon_vpadds_f32: {
5627     llvm::Type *Ty =
5628       llvm::VectorType::get(FloatTy, 2);
5629     Value *Vec = EmitScalarExpr(E->getArg(0));
5630     // The vector is v2f32, so make sure it's bitcast to that.
5631     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5632     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5633     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5634     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5635     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5636     // Pairwise addition of a v2f32 into a scalar f32.
5637     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5638   }
5639   case NEON::BI__builtin_neon_vceqzd_s64:
5640   case NEON::BI__builtin_neon_vceqzd_f64:
5641   case NEON::BI__builtin_neon_vceqzs_f32:
5642     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5643     return EmitAArch64CompareBuiltinExpr(
5644         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5645         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5646   case NEON::BI__builtin_neon_vcgezd_s64:
5647   case NEON::BI__builtin_neon_vcgezd_f64:
5648   case NEON::BI__builtin_neon_vcgezs_f32:
5649     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5650     return EmitAArch64CompareBuiltinExpr(
5651         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5652         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5653   case NEON::BI__builtin_neon_vclezd_s64:
5654   case NEON::BI__builtin_neon_vclezd_f64:
5655   case NEON::BI__builtin_neon_vclezs_f32:
5656     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5657     return EmitAArch64CompareBuiltinExpr(
5658         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5659         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
5660   case NEON::BI__builtin_neon_vcgtzd_s64:
5661   case NEON::BI__builtin_neon_vcgtzd_f64:
5662   case NEON::BI__builtin_neon_vcgtzs_f32:
5663     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5664     return EmitAArch64CompareBuiltinExpr(
5665         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5666         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
5667   case NEON::BI__builtin_neon_vcltzd_s64:
5668   case NEON::BI__builtin_neon_vcltzd_f64:
5669   case NEON::BI__builtin_neon_vcltzs_f32:
5670     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5671     return EmitAArch64CompareBuiltinExpr(
5672         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5673         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
5674 
5675   case NEON::BI__builtin_neon_vceqzd_u64: {
5676     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5677     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5678     Ops[0] =
5679         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
5680     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
5681   }
5682   case NEON::BI__builtin_neon_vceqd_f64:
5683   case NEON::BI__builtin_neon_vcled_f64:
5684   case NEON::BI__builtin_neon_vcltd_f64:
5685   case NEON::BI__builtin_neon_vcged_f64:
5686   case NEON::BI__builtin_neon_vcgtd_f64: {
5687     llvm::CmpInst::Predicate P;
5688     switch (BuiltinID) {
5689     default: llvm_unreachable("missing builtin ID in switch!");
5690     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5691     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5692     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5693     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5694     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5695     }
5696     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5697     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5698     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5699     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5700     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5701   }
5702   case NEON::BI__builtin_neon_vceqs_f32:
5703   case NEON::BI__builtin_neon_vcles_f32:
5704   case NEON::BI__builtin_neon_vclts_f32:
5705   case NEON::BI__builtin_neon_vcges_f32:
5706   case NEON::BI__builtin_neon_vcgts_f32: {
5707     llvm::CmpInst::Predicate P;
5708     switch (BuiltinID) {
5709     default: llvm_unreachable("missing builtin ID in switch!");
5710     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5711     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5712     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5713     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5714     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5715     }
5716     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5717     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5718     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5719     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5720     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5721   }
5722   case NEON::BI__builtin_neon_vceqd_s64:
5723   case NEON::BI__builtin_neon_vceqd_u64:
5724   case NEON::BI__builtin_neon_vcgtd_s64:
5725   case NEON::BI__builtin_neon_vcgtd_u64:
5726   case NEON::BI__builtin_neon_vcltd_s64:
5727   case NEON::BI__builtin_neon_vcltd_u64:
5728   case NEON::BI__builtin_neon_vcged_u64:
5729   case NEON::BI__builtin_neon_vcged_s64:
5730   case NEON::BI__builtin_neon_vcled_u64:
5731   case NEON::BI__builtin_neon_vcled_s64: {
5732     llvm::CmpInst::Predicate P;
5733     switch (BuiltinID) {
5734     default: llvm_unreachable("missing builtin ID in switch!");
5735     case NEON::BI__builtin_neon_vceqd_s64:
5736     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5737     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5738     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5739     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5740     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5741     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5742     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5743     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5744     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5745     }
5746     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5747     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5748     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5749     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5750     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5751   }
5752   case NEON::BI__builtin_neon_vtstd_s64:
5753   case NEON::BI__builtin_neon_vtstd_u64: {
5754     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5755     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5756     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5757     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5758     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5759                                 llvm::Constant::getNullValue(Int64Ty));
5760     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5761   }
5762   case NEON::BI__builtin_neon_vset_lane_i8:
5763   case NEON::BI__builtin_neon_vset_lane_i16:
5764   case NEON::BI__builtin_neon_vset_lane_i32:
5765   case NEON::BI__builtin_neon_vset_lane_i64:
5766   case NEON::BI__builtin_neon_vset_lane_f32:
5767   case NEON::BI__builtin_neon_vsetq_lane_i8:
5768   case NEON::BI__builtin_neon_vsetq_lane_i16:
5769   case NEON::BI__builtin_neon_vsetq_lane_i32:
5770   case NEON::BI__builtin_neon_vsetq_lane_i64:
5771   case NEON::BI__builtin_neon_vsetq_lane_f32:
5772     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5773     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5774   case NEON::BI__builtin_neon_vset_lane_f64:
5775     // The vector type needs a cast for the v1f64 variant.
5776     Ops[1] = Builder.CreateBitCast(Ops[1],
5777                                    llvm::VectorType::get(DoubleTy, 1));
5778     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5779     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5780   case NEON::BI__builtin_neon_vsetq_lane_f64:
5781     // The vector type needs a cast for the v2f64 variant.
5782     Ops[1] = Builder.CreateBitCast(Ops[1],
5783         llvm::VectorType::get(DoubleTy, 2));
5784     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5785     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5786 
5787   case NEON::BI__builtin_neon_vget_lane_i8:
5788   case NEON::BI__builtin_neon_vdupb_lane_i8:
5789     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
5790     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5791                                         "vget_lane");
5792   case NEON::BI__builtin_neon_vgetq_lane_i8:
5793   case NEON::BI__builtin_neon_vdupb_laneq_i8:
5794     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
5795     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5796                                         "vgetq_lane");
5797   case NEON::BI__builtin_neon_vget_lane_i16:
5798   case NEON::BI__builtin_neon_vduph_lane_i16:
5799     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
5800     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5801                                         "vget_lane");
5802   case NEON::BI__builtin_neon_vgetq_lane_i16:
5803   case NEON::BI__builtin_neon_vduph_laneq_i16:
5804     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
5805     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5806                                         "vgetq_lane");
5807   case NEON::BI__builtin_neon_vget_lane_i32:
5808   case NEON::BI__builtin_neon_vdups_lane_i32:
5809     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
5810     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5811                                         "vget_lane");
5812   case NEON::BI__builtin_neon_vdups_lane_f32:
5813     Ops[0] = Builder.CreateBitCast(Ops[0],
5814         llvm::VectorType::get(FloatTy, 2));
5815     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5816                                         "vdups_lane");
5817   case NEON::BI__builtin_neon_vgetq_lane_i32:
5818   case NEON::BI__builtin_neon_vdups_laneq_i32:
5819     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
5820     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5821                                         "vgetq_lane");
5822   case NEON::BI__builtin_neon_vget_lane_i64:
5823   case NEON::BI__builtin_neon_vdupd_lane_i64:
5824     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
5825     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5826                                         "vget_lane");
5827   case NEON::BI__builtin_neon_vdupd_lane_f64:
5828     Ops[0] = Builder.CreateBitCast(Ops[0],
5829         llvm::VectorType::get(DoubleTy, 1));
5830     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5831                                         "vdupd_lane");
5832   case NEON::BI__builtin_neon_vgetq_lane_i64:
5833   case NEON::BI__builtin_neon_vdupd_laneq_i64:
5834     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
5835     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5836                                         "vgetq_lane");
5837   case NEON::BI__builtin_neon_vget_lane_f32:
5838     Ops[0] = Builder.CreateBitCast(Ops[0],
5839         llvm::VectorType::get(FloatTy, 2));
5840     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5841                                         "vget_lane");
5842   case NEON::BI__builtin_neon_vget_lane_f64:
5843     Ops[0] = Builder.CreateBitCast(Ops[0],
5844         llvm::VectorType::get(DoubleTy, 1));
5845     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5846                                         "vget_lane");
5847   case NEON::BI__builtin_neon_vgetq_lane_f32:
5848   case NEON::BI__builtin_neon_vdups_laneq_f32:
5849     Ops[0] = Builder.CreateBitCast(Ops[0],
5850         llvm::VectorType::get(FloatTy, 4));
5851     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5852                                         "vgetq_lane");
5853   case NEON::BI__builtin_neon_vgetq_lane_f64:
5854   case NEON::BI__builtin_neon_vdupd_laneq_f64:
5855     Ops[0] = Builder.CreateBitCast(Ops[0],
5856         llvm::VectorType::get(DoubleTy, 2));
5857     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5858                                         "vgetq_lane");
5859   case NEON::BI__builtin_neon_vaddd_s64:
5860   case NEON::BI__builtin_neon_vaddd_u64:
5861     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
5862   case NEON::BI__builtin_neon_vsubd_s64:
5863   case NEON::BI__builtin_neon_vsubd_u64:
5864     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
5865   case NEON::BI__builtin_neon_vqdmlalh_s16:
5866   case NEON::BI__builtin_neon_vqdmlslh_s16: {
5867     SmallVector<Value *, 2> ProductOps;
5868     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5869     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
5870     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5871     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5872                           ProductOps, "vqdmlXl");
5873     Constant *CI = ConstantInt::get(SizeTy, 0);
5874     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5875 
5876     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5877                                         ? Intrinsic::aarch64_neon_sqadd
5878                                         : Intrinsic::aarch64_neon_sqsub;
5879     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5880   }
5881   case NEON::BI__builtin_neon_vqshlud_n_s64: {
5882     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5883     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5884     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5885                         Ops, "vqshlu_n");
5886   }
5887   case NEON::BI__builtin_neon_vqshld_n_u64:
5888   case NEON::BI__builtin_neon_vqshld_n_s64: {
5889     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5890                                    ? Intrinsic::aarch64_neon_uqshl
5891                                    : Intrinsic::aarch64_neon_sqshl;
5892     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5893     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5894     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5895   }
5896   case NEON::BI__builtin_neon_vrshrd_n_u64:
5897   case NEON::BI__builtin_neon_vrshrd_n_s64: {
5898     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5899                                    ? Intrinsic::aarch64_neon_urshl
5900                                    : Intrinsic::aarch64_neon_srshl;
5901     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5902     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5903     Ops[1] = ConstantInt::get(Int64Ty, -SV);
5904     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5905   }
5906   case NEON::BI__builtin_neon_vrsrad_n_u64:
5907   case NEON::BI__builtin_neon_vrsrad_n_s64: {
5908     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5909                                    ? Intrinsic::aarch64_neon_urshl
5910                                    : Intrinsic::aarch64_neon_srshl;
5911     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5912     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
5913     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5914                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5915     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5916   }
5917   case NEON::BI__builtin_neon_vshld_n_s64:
5918   case NEON::BI__builtin_neon_vshld_n_u64: {
5919     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5920     return Builder.CreateShl(
5921         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5922   }
5923   case NEON::BI__builtin_neon_vshrd_n_s64: {
5924     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5925     return Builder.CreateAShr(
5926         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5927                                                    Amt->getZExtValue())),
5928         "shrd_n");
5929   }
5930   case NEON::BI__builtin_neon_vshrd_n_u64: {
5931     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5932     uint64_t ShiftAmt = Amt->getZExtValue();
5933     // Right-shifting an unsigned value by its size yields 0.
5934     if (ShiftAmt == 64)
5935       return ConstantInt::get(Int64Ty, 0);
5936     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
5937                               "shrd_n");
5938   }
5939   case NEON::BI__builtin_neon_vsrad_n_s64: {
5940     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5941     Ops[1] = Builder.CreateAShr(
5942         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5943                                                    Amt->getZExtValue())),
5944         "shrd_n");
5945     return Builder.CreateAdd(Ops[0], Ops[1]);
5946   }
5947   case NEON::BI__builtin_neon_vsrad_n_u64: {
5948     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5949     uint64_t ShiftAmt = Amt->getZExtValue();
5950     // Right-shifting an unsigned value by its size yields 0.
5951     // As Op + 0 = Op, return Ops[0] directly.
5952     if (ShiftAmt == 64)
5953       return Ops[0];
5954     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
5955                                 "shrd_n");
5956     return Builder.CreateAdd(Ops[0], Ops[1]);
5957   }
5958   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5959   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5960   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5961   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5962     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5963                                           "lane");
5964     SmallVector<Value *, 2> ProductOps;
5965     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5966     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5967     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5968     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5969                           ProductOps, "vqdmlXl");
5970     Constant *CI = ConstantInt::get(SizeTy, 0);
5971     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5972     Ops.pop_back();
5973 
5974     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5975                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5976                           ? Intrinsic::aarch64_neon_sqadd
5977                           : Intrinsic::aarch64_neon_sqsub;
5978     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
5979   }
5980   case NEON::BI__builtin_neon_vqdmlals_s32:
5981   case NEON::BI__builtin_neon_vqdmlsls_s32: {
5982     SmallVector<Value *, 2> ProductOps;
5983     ProductOps.push_back(Ops[1]);
5984     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
5985     Ops[1] =
5986         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5987                      ProductOps, "vqdmlXl");
5988 
5989     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5990                                         ? Intrinsic::aarch64_neon_sqadd
5991                                         : Intrinsic::aarch64_neon_sqsub;
5992     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
5993   }
5994   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5995   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5996   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5997   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5998     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5999                                           "lane");
6000     SmallVector<Value *, 2> ProductOps;
6001     ProductOps.push_back(Ops[1]);
6002     ProductOps.push_back(Ops[2]);
6003     Ops[1] =
6004         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6005                      ProductOps, "vqdmlXl");
6006     Ops.pop_back();
6007 
6008     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6009                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6010                           ? Intrinsic::aarch64_neon_sqadd
6011                           : Intrinsic::aarch64_neon_sqsub;
6012     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6013   }
6014   }
6015 
6016   llvm::VectorType *VTy = GetNeonType(this, Type);
6017   llvm::Type *Ty = VTy;
6018   if (!Ty)
6019     return nullptr;
6020 
6021   // Not all intrinsics handled by the common case work for AArch64 yet, so only
6022   // defer to common code if it's been added to our special map.
6023   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
6024                                    AArch64SIMDIntrinsicsProvenSorted);
6025 
6026   if (Builtin)
6027     return EmitCommonNeonBuiltinExpr(
6028         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6029         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
6030         /*never use addresses*/ Address::invalid(), Address::invalid());
6031 
6032   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
6033     return V;
6034 
6035   unsigned Int;
6036   switch (BuiltinID) {
6037   default: return nullptr;
6038   case NEON::BI__builtin_neon_vbsl_v:
6039   case NEON::BI__builtin_neon_vbslq_v: {
6040     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6041     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6042     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6043     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6044 
6045     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6046     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6047     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6048     return Builder.CreateBitCast(Ops[0], Ty);
6049   }
6050   case NEON::BI__builtin_neon_vfma_lane_v:
6051   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6052     // The ARM builtins (and instructions) have the addend as the first
6053     // operand, but the 'fma' intrinsics have it last. Swap it around here.
6054     Value *Addend = Ops[0];
6055     Value *Multiplicand = Ops[1];
6056     Value *LaneSource = Ops[2];
6057     Ops[0] = Multiplicand;
6058     Ops[1] = LaneSource;
6059     Ops[2] = Addend;
6060 
6061     // Now adjust things to handle the lane access.
6062     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
6063       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
6064       VTy;
6065     llvm::Constant *cst = cast<Constant>(Ops[3]);
6066     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
6067     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6068     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6069 
6070     Ops.pop_back();
6071     Int = Intrinsic::fma;
6072     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6073   }
6074   case NEON::BI__builtin_neon_vfma_laneq_v: {
6075     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6076     // v1f64 fma should be mapped to Neon scalar f64 fma
6077     if (VTy && VTy->getElementType() == DoubleTy) {
6078       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6079       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6080       llvm::Type *VTy = GetNeonType(this,
6081         NeonTypeFlags(NeonTypeFlags::Float64, false, true));
6082       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6083       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6084       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
6085       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6086       return Builder.CreateBitCast(Result, Ty);
6087     }
6088     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6089     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6090     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6091 
6092     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
6093                                             VTy->getNumElements() * 2);
6094     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6095     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
6096                                                cast<ConstantInt>(Ops[3]));
6097     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6098 
6099     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6100   }
6101   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6102     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6103     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6104     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6105 
6106     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6107     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6108     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6109   }
6110   case NEON::BI__builtin_neon_vfmas_lane_f32:
6111   case NEON::BI__builtin_neon_vfmas_laneq_f32:
6112   case NEON::BI__builtin_neon_vfmad_lane_f64:
6113   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6114     Ops.push_back(EmitScalarExpr(E->getArg(3)));
6115     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6116     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6117     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6118     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6119   }
6120   case NEON::BI__builtin_neon_vmull_v:
6121     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6122     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6123     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6124     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6125   case NEON::BI__builtin_neon_vmax_v:
6126   case NEON::BI__builtin_neon_vmaxq_v:
6127     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6128     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6129     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6130     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6131   case NEON::BI__builtin_neon_vmin_v:
6132   case NEON::BI__builtin_neon_vminq_v:
6133     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6134     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6135     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6136     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6137   case NEON::BI__builtin_neon_vabd_v:
6138   case NEON::BI__builtin_neon_vabdq_v:
6139     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6140     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6141     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6142     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6143   case NEON::BI__builtin_neon_vpadal_v:
6144   case NEON::BI__builtin_neon_vpadalq_v: {
6145     unsigned ArgElts = VTy->getNumElements();
6146     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6147     unsigned BitWidth = EltTy->getBitWidth();
6148     llvm::Type *ArgTy = llvm::VectorType::get(
6149         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
6150     llvm::Type* Tys[2] = { VTy, ArgTy };
6151     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6152     SmallVector<llvm::Value*, 1> TmpOps;
6153     TmpOps.push_back(Ops[1]);
6154     Function *F = CGM.getIntrinsic(Int, Tys);
6155     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6156     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6157     return Builder.CreateAdd(tmp, addend);
6158   }
6159   case NEON::BI__builtin_neon_vpmin_v:
6160   case NEON::BI__builtin_neon_vpminq_v:
6161     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6162     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6163     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6164     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6165   case NEON::BI__builtin_neon_vpmax_v:
6166   case NEON::BI__builtin_neon_vpmaxq_v:
6167     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6168     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6169     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6170     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6171   case NEON::BI__builtin_neon_vminnm_v:
6172   case NEON::BI__builtin_neon_vminnmq_v:
6173     Int = Intrinsic::aarch64_neon_fminnm;
6174     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6175   case NEON::BI__builtin_neon_vmaxnm_v:
6176   case NEON::BI__builtin_neon_vmaxnmq_v:
6177     Int = Intrinsic::aarch64_neon_fmaxnm;
6178     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6179   case NEON::BI__builtin_neon_vrecpss_f32: {
6180     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6181     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6182                         Ops, "vrecps");
6183   }
6184   case NEON::BI__builtin_neon_vrecpsd_f64: {
6185     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6186     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6187                         Ops, "vrecps");
6188   }
6189   case NEON::BI__builtin_neon_vqshrun_n_v:
6190     Int = Intrinsic::aarch64_neon_sqshrun;
6191     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6192   case NEON::BI__builtin_neon_vqrshrun_n_v:
6193     Int = Intrinsic::aarch64_neon_sqrshrun;
6194     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6195   case NEON::BI__builtin_neon_vqshrn_n_v:
6196     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6197     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6198   case NEON::BI__builtin_neon_vrshrn_n_v:
6199     Int = Intrinsic::aarch64_neon_rshrn;
6200     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6201   case NEON::BI__builtin_neon_vqrshrn_n_v:
6202     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6203     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6204   case NEON::BI__builtin_neon_vrnda_v:
6205   case NEON::BI__builtin_neon_vrndaq_v: {
6206     Int = Intrinsic::round;
6207     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6208   }
6209   case NEON::BI__builtin_neon_vrndi_v:
6210   case NEON::BI__builtin_neon_vrndiq_v: {
6211     Int = Intrinsic::nearbyint;
6212     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
6213   }
6214   case NEON::BI__builtin_neon_vrndm_v:
6215   case NEON::BI__builtin_neon_vrndmq_v: {
6216     Int = Intrinsic::floor;
6217     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6218   }
6219   case NEON::BI__builtin_neon_vrndn_v:
6220   case NEON::BI__builtin_neon_vrndnq_v: {
6221     Int = Intrinsic::aarch64_neon_frintn;
6222     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6223   }
6224   case NEON::BI__builtin_neon_vrndp_v:
6225   case NEON::BI__builtin_neon_vrndpq_v: {
6226     Int = Intrinsic::ceil;
6227     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6228   }
6229   case NEON::BI__builtin_neon_vrndx_v:
6230   case NEON::BI__builtin_neon_vrndxq_v: {
6231     Int = Intrinsic::rint;
6232     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6233   }
6234   case NEON::BI__builtin_neon_vrnd_v:
6235   case NEON::BI__builtin_neon_vrndq_v: {
6236     Int = Intrinsic::trunc;
6237     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6238   }
6239   case NEON::BI__builtin_neon_vceqz_v:
6240   case NEON::BI__builtin_neon_vceqzq_v:
6241     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
6242                                          ICmpInst::ICMP_EQ, "vceqz");
6243   case NEON::BI__builtin_neon_vcgez_v:
6244   case NEON::BI__builtin_neon_vcgezq_v:
6245     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
6246                                          ICmpInst::ICMP_SGE, "vcgez");
6247   case NEON::BI__builtin_neon_vclez_v:
6248   case NEON::BI__builtin_neon_vclezq_v:
6249     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
6250                                          ICmpInst::ICMP_SLE, "vclez");
6251   case NEON::BI__builtin_neon_vcgtz_v:
6252   case NEON::BI__builtin_neon_vcgtzq_v:
6253     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
6254                                          ICmpInst::ICMP_SGT, "vcgtz");
6255   case NEON::BI__builtin_neon_vcltz_v:
6256   case NEON::BI__builtin_neon_vcltzq_v:
6257     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
6258                                          ICmpInst::ICMP_SLT, "vcltz");
6259   case NEON::BI__builtin_neon_vcvt_f64_v:
6260   case NEON::BI__builtin_neon_vcvtq_f64_v:
6261     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6262     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6263     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6264                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6265   case NEON::BI__builtin_neon_vcvt_f64_f32: {
6266     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6267            "unexpected vcvt_f64_f32 builtin");
6268     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6269     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6270 
6271     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6272   }
6273   case NEON::BI__builtin_neon_vcvt_f32_f64: {
6274     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6275            "unexpected vcvt_f32_f64 builtin");
6276     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6277     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6278 
6279     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6280   }
6281   case NEON::BI__builtin_neon_vcvt_s32_v:
6282   case NEON::BI__builtin_neon_vcvt_u32_v:
6283   case NEON::BI__builtin_neon_vcvt_s64_v:
6284   case NEON::BI__builtin_neon_vcvt_u64_v:
6285   case NEON::BI__builtin_neon_vcvtq_s32_v:
6286   case NEON::BI__builtin_neon_vcvtq_u32_v:
6287   case NEON::BI__builtin_neon_vcvtq_s64_v:
6288   case NEON::BI__builtin_neon_vcvtq_u64_v: {
6289     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
6290     if (usgn)
6291       return Builder.CreateFPToUI(Ops[0], Ty);
6292     return Builder.CreateFPToSI(Ops[0], Ty);
6293   }
6294   case NEON::BI__builtin_neon_vcvta_s32_v:
6295   case NEON::BI__builtin_neon_vcvtaq_s32_v:
6296   case NEON::BI__builtin_neon_vcvta_u32_v:
6297   case NEON::BI__builtin_neon_vcvtaq_u32_v:
6298   case NEON::BI__builtin_neon_vcvta_s64_v:
6299   case NEON::BI__builtin_neon_vcvtaq_s64_v:
6300   case NEON::BI__builtin_neon_vcvta_u64_v:
6301   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6302     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6303     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6304     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6305   }
6306   case NEON::BI__builtin_neon_vcvtm_s32_v:
6307   case NEON::BI__builtin_neon_vcvtmq_s32_v:
6308   case NEON::BI__builtin_neon_vcvtm_u32_v:
6309   case NEON::BI__builtin_neon_vcvtmq_u32_v:
6310   case NEON::BI__builtin_neon_vcvtm_s64_v:
6311   case NEON::BI__builtin_neon_vcvtmq_s64_v:
6312   case NEON::BI__builtin_neon_vcvtm_u64_v:
6313   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6314     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6315     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6316     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6317   }
6318   case NEON::BI__builtin_neon_vcvtn_s32_v:
6319   case NEON::BI__builtin_neon_vcvtnq_s32_v:
6320   case NEON::BI__builtin_neon_vcvtn_u32_v:
6321   case NEON::BI__builtin_neon_vcvtnq_u32_v:
6322   case NEON::BI__builtin_neon_vcvtn_s64_v:
6323   case NEON::BI__builtin_neon_vcvtnq_s64_v:
6324   case NEON::BI__builtin_neon_vcvtn_u64_v:
6325   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6326     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6327     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6328     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6329   }
6330   case NEON::BI__builtin_neon_vcvtp_s32_v:
6331   case NEON::BI__builtin_neon_vcvtpq_s32_v:
6332   case NEON::BI__builtin_neon_vcvtp_u32_v:
6333   case NEON::BI__builtin_neon_vcvtpq_u32_v:
6334   case NEON::BI__builtin_neon_vcvtp_s64_v:
6335   case NEON::BI__builtin_neon_vcvtpq_s64_v:
6336   case NEON::BI__builtin_neon_vcvtp_u64_v:
6337   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6338     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6339     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6340     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6341   }
6342   case NEON::BI__builtin_neon_vmulx_v:
6343   case NEON::BI__builtin_neon_vmulxq_v: {
6344     Int = Intrinsic::aarch64_neon_fmulx;
6345     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6346   }
6347   case NEON::BI__builtin_neon_vmul_lane_v:
6348   case NEON::BI__builtin_neon_vmul_laneq_v: {
6349     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6350     bool Quad = false;
6351     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6352       Quad = true;
6353     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6354     llvm::Type *VTy = GetNeonType(this,
6355       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
6356     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6357     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6358     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
6359     return Builder.CreateBitCast(Result, Ty);
6360   }
6361   case NEON::BI__builtin_neon_vnegd_s64:
6362     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
6363   case NEON::BI__builtin_neon_vpmaxnm_v:
6364   case NEON::BI__builtin_neon_vpmaxnmq_v: {
6365     Int = Intrinsic::aarch64_neon_fmaxnmp;
6366     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
6367   }
6368   case NEON::BI__builtin_neon_vpminnm_v:
6369   case NEON::BI__builtin_neon_vpminnmq_v: {
6370     Int = Intrinsic::aarch64_neon_fminnmp;
6371     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
6372   }
6373   case NEON::BI__builtin_neon_vsqrt_v:
6374   case NEON::BI__builtin_neon_vsqrtq_v: {
6375     Int = Intrinsic::sqrt;
6376     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6377     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
6378   }
6379   case NEON::BI__builtin_neon_vrbit_v:
6380   case NEON::BI__builtin_neon_vrbitq_v: {
6381     Int = Intrinsic::aarch64_neon_rbit;
6382     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
6383   }
6384   case NEON::BI__builtin_neon_vaddv_u8:
6385     // FIXME: These are handled by the AArch64 scalar code.
6386     usgn = true;
6387     // FALLTHROUGH
6388   case NEON::BI__builtin_neon_vaddv_s8: {
6389     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6390     Ty = Int32Ty;
6391     VTy = llvm::VectorType::get(Int8Ty, 8);
6392     llvm::Type *Tys[2] = { Ty, VTy };
6393     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6394     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6395     return Builder.CreateTrunc(Ops[0], Int8Ty);
6396   }
6397   case NEON::BI__builtin_neon_vaddv_u16:
6398     usgn = true;
6399     // FALLTHROUGH
6400   case NEON::BI__builtin_neon_vaddv_s16: {
6401     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6402     Ty = Int32Ty;
6403     VTy = llvm::VectorType::get(Int16Ty, 4);
6404     llvm::Type *Tys[2] = { Ty, VTy };
6405     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6406     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6407     return Builder.CreateTrunc(Ops[0], Int16Ty);
6408   }
6409   case NEON::BI__builtin_neon_vaddvq_u8:
6410     usgn = true;
6411     // FALLTHROUGH
6412   case NEON::BI__builtin_neon_vaddvq_s8: {
6413     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6414     Ty = Int32Ty;
6415     VTy = llvm::VectorType::get(Int8Ty, 16);
6416     llvm::Type *Tys[2] = { Ty, VTy };
6417     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6418     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6419     return Builder.CreateTrunc(Ops[0], Int8Ty);
6420   }
6421   case NEON::BI__builtin_neon_vaddvq_u16:
6422     usgn = true;
6423     // FALLTHROUGH
6424   case NEON::BI__builtin_neon_vaddvq_s16: {
6425     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6426     Ty = Int32Ty;
6427     VTy = llvm::VectorType::get(Int16Ty, 8);
6428     llvm::Type *Tys[2] = { Ty, VTy };
6429     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6430     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6431     return Builder.CreateTrunc(Ops[0], Int16Ty);
6432   }
6433   case NEON::BI__builtin_neon_vmaxv_u8: {
6434     Int = Intrinsic::aarch64_neon_umaxv;
6435     Ty = Int32Ty;
6436     VTy = llvm::VectorType::get(Int8Ty, 8);
6437     llvm::Type *Tys[2] = { Ty, VTy };
6438     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6439     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6440     return Builder.CreateTrunc(Ops[0], Int8Ty);
6441   }
6442   case NEON::BI__builtin_neon_vmaxv_u16: {
6443     Int = Intrinsic::aarch64_neon_umaxv;
6444     Ty = Int32Ty;
6445     VTy = llvm::VectorType::get(Int16Ty, 4);
6446     llvm::Type *Tys[2] = { Ty, VTy };
6447     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6448     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6449     return Builder.CreateTrunc(Ops[0], Int16Ty);
6450   }
6451   case NEON::BI__builtin_neon_vmaxvq_u8: {
6452     Int = Intrinsic::aarch64_neon_umaxv;
6453     Ty = Int32Ty;
6454     VTy = llvm::VectorType::get(Int8Ty, 16);
6455     llvm::Type *Tys[2] = { Ty, VTy };
6456     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6457     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6458     return Builder.CreateTrunc(Ops[0], Int8Ty);
6459   }
6460   case NEON::BI__builtin_neon_vmaxvq_u16: {
6461     Int = Intrinsic::aarch64_neon_umaxv;
6462     Ty = Int32Ty;
6463     VTy = llvm::VectorType::get(Int16Ty, 8);
6464     llvm::Type *Tys[2] = { Ty, VTy };
6465     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6466     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6467     return Builder.CreateTrunc(Ops[0], Int16Ty);
6468   }
6469   case NEON::BI__builtin_neon_vmaxv_s8: {
6470     Int = Intrinsic::aarch64_neon_smaxv;
6471     Ty = Int32Ty;
6472     VTy = llvm::VectorType::get(Int8Ty, 8);
6473     llvm::Type *Tys[2] = { Ty, VTy };
6474     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6475     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6476     return Builder.CreateTrunc(Ops[0], Int8Ty);
6477   }
6478   case NEON::BI__builtin_neon_vmaxv_s16: {
6479     Int = Intrinsic::aarch64_neon_smaxv;
6480     Ty = Int32Ty;
6481     VTy = llvm::VectorType::get(Int16Ty, 4);
6482     llvm::Type *Tys[2] = { Ty, VTy };
6483     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6484     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6485     return Builder.CreateTrunc(Ops[0], Int16Ty);
6486   }
6487   case NEON::BI__builtin_neon_vmaxvq_s8: {
6488     Int = Intrinsic::aarch64_neon_smaxv;
6489     Ty = Int32Ty;
6490     VTy = llvm::VectorType::get(Int8Ty, 16);
6491     llvm::Type *Tys[2] = { Ty, VTy };
6492     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6493     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6494     return Builder.CreateTrunc(Ops[0], Int8Ty);
6495   }
6496   case NEON::BI__builtin_neon_vmaxvq_s16: {
6497     Int = Intrinsic::aarch64_neon_smaxv;
6498     Ty = Int32Ty;
6499     VTy = llvm::VectorType::get(Int16Ty, 8);
6500     llvm::Type *Tys[2] = { Ty, VTy };
6501     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6502     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6503     return Builder.CreateTrunc(Ops[0], Int16Ty);
6504   }
6505   case NEON::BI__builtin_neon_vminv_u8: {
6506     Int = Intrinsic::aarch64_neon_uminv;
6507     Ty = Int32Ty;
6508     VTy = llvm::VectorType::get(Int8Ty, 8);
6509     llvm::Type *Tys[2] = { Ty, VTy };
6510     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6511     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6512     return Builder.CreateTrunc(Ops[0], Int8Ty);
6513   }
6514   case NEON::BI__builtin_neon_vminv_u16: {
6515     Int = Intrinsic::aarch64_neon_uminv;
6516     Ty = Int32Ty;
6517     VTy = llvm::VectorType::get(Int16Ty, 4);
6518     llvm::Type *Tys[2] = { Ty, VTy };
6519     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6520     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6521     return Builder.CreateTrunc(Ops[0], Int16Ty);
6522   }
6523   case NEON::BI__builtin_neon_vminvq_u8: {
6524     Int = Intrinsic::aarch64_neon_uminv;
6525     Ty = Int32Ty;
6526     VTy = llvm::VectorType::get(Int8Ty, 16);
6527     llvm::Type *Tys[2] = { Ty, VTy };
6528     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6529     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6530     return Builder.CreateTrunc(Ops[0], Int8Ty);
6531   }
6532   case NEON::BI__builtin_neon_vminvq_u16: {
6533     Int = Intrinsic::aarch64_neon_uminv;
6534     Ty = Int32Ty;
6535     VTy = llvm::VectorType::get(Int16Ty, 8);
6536     llvm::Type *Tys[2] = { Ty, VTy };
6537     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6538     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6539     return Builder.CreateTrunc(Ops[0], Int16Ty);
6540   }
6541   case NEON::BI__builtin_neon_vminv_s8: {
6542     Int = Intrinsic::aarch64_neon_sminv;
6543     Ty = Int32Ty;
6544     VTy = llvm::VectorType::get(Int8Ty, 8);
6545     llvm::Type *Tys[2] = { Ty, VTy };
6546     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6547     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6548     return Builder.CreateTrunc(Ops[0], Int8Ty);
6549   }
6550   case NEON::BI__builtin_neon_vminv_s16: {
6551     Int = Intrinsic::aarch64_neon_sminv;
6552     Ty = Int32Ty;
6553     VTy = llvm::VectorType::get(Int16Ty, 4);
6554     llvm::Type *Tys[2] = { Ty, VTy };
6555     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6556     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6557     return Builder.CreateTrunc(Ops[0], Int16Ty);
6558   }
6559   case NEON::BI__builtin_neon_vminvq_s8: {
6560     Int = Intrinsic::aarch64_neon_sminv;
6561     Ty = Int32Ty;
6562     VTy = llvm::VectorType::get(Int8Ty, 16);
6563     llvm::Type *Tys[2] = { Ty, VTy };
6564     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6565     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6566     return Builder.CreateTrunc(Ops[0], Int8Ty);
6567   }
6568   case NEON::BI__builtin_neon_vminvq_s16: {
6569     Int = Intrinsic::aarch64_neon_sminv;
6570     Ty = Int32Ty;
6571     VTy = llvm::VectorType::get(Int16Ty, 8);
6572     llvm::Type *Tys[2] = { Ty, VTy };
6573     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6574     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6575     return Builder.CreateTrunc(Ops[0], Int16Ty);
6576   }
6577   case NEON::BI__builtin_neon_vmul_n_f64: {
6578     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6579     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
6580     return Builder.CreateFMul(Ops[0], RHS);
6581   }
6582   case NEON::BI__builtin_neon_vaddlv_u8: {
6583     Int = Intrinsic::aarch64_neon_uaddlv;
6584     Ty = Int32Ty;
6585     VTy = llvm::VectorType::get(Int8Ty, 8);
6586     llvm::Type *Tys[2] = { Ty, VTy };
6587     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6588     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6589     return Builder.CreateTrunc(Ops[0], Int16Ty);
6590   }
6591   case NEON::BI__builtin_neon_vaddlv_u16: {
6592     Int = Intrinsic::aarch64_neon_uaddlv;
6593     Ty = Int32Ty;
6594     VTy = llvm::VectorType::get(Int16Ty, 4);
6595     llvm::Type *Tys[2] = { Ty, VTy };
6596     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6597     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6598   }
6599   case NEON::BI__builtin_neon_vaddlvq_u8: {
6600     Int = Intrinsic::aarch64_neon_uaddlv;
6601     Ty = Int32Ty;
6602     VTy = llvm::VectorType::get(Int8Ty, 16);
6603     llvm::Type *Tys[2] = { Ty, VTy };
6604     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6605     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6606     return Builder.CreateTrunc(Ops[0], Int16Ty);
6607   }
6608   case NEON::BI__builtin_neon_vaddlvq_u16: {
6609     Int = Intrinsic::aarch64_neon_uaddlv;
6610     Ty = Int32Ty;
6611     VTy = llvm::VectorType::get(Int16Ty, 8);
6612     llvm::Type *Tys[2] = { Ty, VTy };
6613     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6614     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6615   }
6616   case NEON::BI__builtin_neon_vaddlv_s8: {
6617     Int = Intrinsic::aarch64_neon_saddlv;
6618     Ty = Int32Ty;
6619     VTy = llvm::VectorType::get(Int8Ty, 8);
6620     llvm::Type *Tys[2] = { Ty, VTy };
6621     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6622     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6623     return Builder.CreateTrunc(Ops[0], Int16Ty);
6624   }
6625   case NEON::BI__builtin_neon_vaddlv_s16: {
6626     Int = Intrinsic::aarch64_neon_saddlv;
6627     Ty = Int32Ty;
6628     VTy = llvm::VectorType::get(Int16Ty, 4);
6629     llvm::Type *Tys[2] = { Ty, VTy };
6630     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6631     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6632   }
6633   case NEON::BI__builtin_neon_vaddlvq_s8: {
6634     Int = Intrinsic::aarch64_neon_saddlv;
6635     Ty = Int32Ty;
6636     VTy = llvm::VectorType::get(Int8Ty, 16);
6637     llvm::Type *Tys[2] = { Ty, VTy };
6638     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6639     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6640     return Builder.CreateTrunc(Ops[0], Int16Ty);
6641   }
6642   case NEON::BI__builtin_neon_vaddlvq_s16: {
6643     Int = Intrinsic::aarch64_neon_saddlv;
6644     Ty = Int32Ty;
6645     VTy = llvm::VectorType::get(Int16Ty, 8);
6646     llvm::Type *Tys[2] = { Ty, VTy };
6647     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6648     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6649   }
6650   case NEON::BI__builtin_neon_vsri_n_v:
6651   case NEON::BI__builtin_neon_vsriq_n_v: {
6652     Int = Intrinsic::aarch64_neon_vsri;
6653     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6654     return EmitNeonCall(Intrin, Ops, "vsri_n");
6655   }
6656   case NEON::BI__builtin_neon_vsli_n_v:
6657   case NEON::BI__builtin_neon_vsliq_n_v: {
6658     Int = Intrinsic::aarch64_neon_vsli;
6659     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6660     return EmitNeonCall(Intrin, Ops, "vsli_n");
6661   }
6662   case NEON::BI__builtin_neon_vsra_n_v:
6663   case NEON::BI__builtin_neon_vsraq_n_v:
6664     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6665     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6666     return Builder.CreateAdd(Ops[0], Ops[1]);
6667   case NEON::BI__builtin_neon_vrsra_n_v:
6668   case NEON::BI__builtin_neon_vrsraq_n_v: {
6669     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6670     SmallVector<llvm::Value*,2> TmpOps;
6671     TmpOps.push_back(Ops[1]);
6672     TmpOps.push_back(Ops[2]);
6673     Function* F = CGM.getIntrinsic(Int, Ty);
6674     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6675     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6676     return Builder.CreateAdd(Ops[0], tmp);
6677   }
6678     // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
6679     // of an Align parameter here.
6680   case NEON::BI__builtin_neon_vld1_x2_v:
6681   case NEON::BI__builtin_neon_vld1q_x2_v:
6682   case NEON::BI__builtin_neon_vld1_x3_v:
6683   case NEON::BI__builtin_neon_vld1q_x3_v:
6684   case NEON::BI__builtin_neon_vld1_x4_v:
6685   case NEON::BI__builtin_neon_vld1q_x4_v: {
6686     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6687     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6688     llvm::Type *Tys[2] = { VTy, PTy };
6689     unsigned Int;
6690     switch (BuiltinID) {
6691     case NEON::BI__builtin_neon_vld1_x2_v:
6692     case NEON::BI__builtin_neon_vld1q_x2_v:
6693       Int = Intrinsic::aarch64_neon_ld1x2;
6694       break;
6695     case NEON::BI__builtin_neon_vld1_x3_v:
6696     case NEON::BI__builtin_neon_vld1q_x3_v:
6697       Int = Intrinsic::aarch64_neon_ld1x3;
6698       break;
6699     case NEON::BI__builtin_neon_vld1_x4_v:
6700     case NEON::BI__builtin_neon_vld1q_x4_v:
6701       Int = Intrinsic::aarch64_neon_ld1x4;
6702       break;
6703     }
6704     Function *F = CGM.getIntrinsic(Int, Tys);
6705     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
6706     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6707     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6708     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6709   }
6710   case NEON::BI__builtin_neon_vst1_x2_v:
6711   case NEON::BI__builtin_neon_vst1q_x2_v:
6712   case NEON::BI__builtin_neon_vst1_x3_v:
6713   case NEON::BI__builtin_neon_vst1q_x3_v:
6714   case NEON::BI__builtin_neon_vst1_x4_v:
6715   case NEON::BI__builtin_neon_vst1q_x4_v: {
6716     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6717     llvm::Type *Tys[2] = { VTy, PTy };
6718     unsigned Int;
6719     switch (BuiltinID) {
6720     case NEON::BI__builtin_neon_vst1_x2_v:
6721     case NEON::BI__builtin_neon_vst1q_x2_v:
6722       Int = Intrinsic::aarch64_neon_st1x2;
6723       break;
6724     case NEON::BI__builtin_neon_vst1_x3_v:
6725     case NEON::BI__builtin_neon_vst1q_x3_v:
6726       Int = Intrinsic::aarch64_neon_st1x3;
6727       break;
6728     case NEON::BI__builtin_neon_vst1_x4_v:
6729     case NEON::BI__builtin_neon_vst1q_x4_v:
6730       Int = Intrinsic::aarch64_neon_st1x4;
6731       break;
6732     }
6733     std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6734     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
6735   }
6736   case NEON::BI__builtin_neon_vld1_v:
6737   case NEON::BI__builtin_neon_vld1q_v: {
6738     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6739     auto Alignment = CharUnits::fromQuantity(
6740         BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16);
6741     return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
6742   }
6743   case NEON::BI__builtin_neon_vst1_v:
6744   case NEON::BI__builtin_neon_vst1q_v:
6745     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6746     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6747     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6748   case NEON::BI__builtin_neon_vld1_lane_v:
6749   case NEON::BI__builtin_neon_vld1q_lane_v: {
6750     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6751     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6752     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6753     auto Alignment = CharUnits::fromQuantity(
6754         BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16);
6755     Ops[0] =
6756         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6757     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6758   }
6759   case NEON::BI__builtin_neon_vld1_dup_v:
6760   case NEON::BI__builtin_neon_vld1q_dup_v: {
6761     Value *V = UndefValue::get(Ty);
6762     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6763     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6764     auto Alignment = CharUnits::fromQuantity(
6765         BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16);
6766     Ops[0] =
6767         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6768     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6769     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6770     return EmitNeonSplat(Ops[0], CI);
6771   }
6772   case NEON::BI__builtin_neon_vst1_lane_v:
6773   case NEON::BI__builtin_neon_vst1q_lane_v:
6774     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6775     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6776     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6777     return Builder.CreateDefaultAlignedStore(Ops[1],
6778                                              Builder.CreateBitCast(Ops[0], Ty));
6779   case NEON::BI__builtin_neon_vld2_v:
6780   case NEON::BI__builtin_neon_vld2q_v: {
6781     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6782     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6783     llvm::Type *Tys[2] = { VTy, PTy };
6784     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6785     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6786     Ops[0] = Builder.CreateBitCast(Ops[0],
6787                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6788     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6789   }
6790   case NEON::BI__builtin_neon_vld3_v:
6791   case NEON::BI__builtin_neon_vld3q_v: {
6792     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6793     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6794     llvm::Type *Tys[2] = { VTy, PTy };
6795     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6796     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6797     Ops[0] = Builder.CreateBitCast(Ops[0],
6798                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6799     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6800   }
6801   case NEON::BI__builtin_neon_vld4_v:
6802   case NEON::BI__builtin_neon_vld4q_v: {
6803     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6804     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6805     llvm::Type *Tys[2] = { VTy, PTy };
6806     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6807     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6808     Ops[0] = Builder.CreateBitCast(Ops[0],
6809                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6810     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6811   }
6812   case NEON::BI__builtin_neon_vld2_dup_v:
6813   case NEON::BI__builtin_neon_vld2q_dup_v: {
6814     llvm::Type *PTy =
6815       llvm::PointerType::getUnqual(VTy->getElementType());
6816     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6817     llvm::Type *Tys[2] = { VTy, PTy };
6818     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6819     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6820     Ops[0] = Builder.CreateBitCast(Ops[0],
6821                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6822     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6823   }
6824   case NEON::BI__builtin_neon_vld3_dup_v:
6825   case NEON::BI__builtin_neon_vld3q_dup_v: {
6826     llvm::Type *PTy =
6827       llvm::PointerType::getUnqual(VTy->getElementType());
6828     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6829     llvm::Type *Tys[2] = { VTy, PTy };
6830     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6831     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6832     Ops[0] = Builder.CreateBitCast(Ops[0],
6833                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6834     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6835   }
6836   case NEON::BI__builtin_neon_vld4_dup_v:
6837   case NEON::BI__builtin_neon_vld4q_dup_v: {
6838     llvm::Type *PTy =
6839       llvm::PointerType::getUnqual(VTy->getElementType());
6840     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6841     llvm::Type *Tys[2] = { VTy, PTy };
6842     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6843     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6844     Ops[0] = Builder.CreateBitCast(Ops[0],
6845                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6846     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6847   }
6848   case NEON::BI__builtin_neon_vld2_lane_v:
6849   case NEON::BI__builtin_neon_vld2q_lane_v: {
6850     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6851     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6852     Ops.push_back(Ops[1]);
6853     Ops.erase(Ops.begin()+1);
6854     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6855     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6856     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6857     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
6858     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6859     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6860     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6861   }
6862   case NEON::BI__builtin_neon_vld3_lane_v:
6863   case NEON::BI__builtin_neon_vld3q_lane_v: {
6864     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6865     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6866     Ops.push_back(Ops[1]);
6867     Ops.erase(Ops.begin()+1);
6868     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6869     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6870     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6871     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6872     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
6873     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6874     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6875     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6876   }
6877   case NEON::BI__builtin_neon_vld4_lane_v:
6878   case NEON::BI__builtin_neon_vld4q_lane_v: {
6879     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6880     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6881     Ops.push_back(Ops[1]);
6882     Ops.erase(Ops.begin()+1);
6883     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6884     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6885     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6886     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6887     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6888     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
6889     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6890     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6891     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6892   }
6893   case NEON::BI__builtin_neon_vst2_v:
6894   case NEON::BI__builtin_neon_vst2q_v: {
6895     Ops.push_back(Ops[0]);
6896     Ops.erase(Ops.begin());
6897     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6898     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6899                         Ops, "");
6900   }
6901   case NEON::BI__builtin_neon_vst2_lane_v:
6902   case NEON::BI__builtin_neon_vst2q_lane_v: {
6903     Ops.push_back(Ops[0]);
6904     Ops.erase(Ops.begin());
6905     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6906     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6907     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6908                         Ops, "");
6909   }
6910   case NEON::BI__builtin_neon_vst3_v:
6911   case NEON::BI__builtin_neon_vst3q_v: {
6912     Ops.push_back(Ops[0]);
6913     Ops.erase(Ops.begin());
6914     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6915     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6916                         Ops, "");
6917   }
6918   case NEON::BI__builtin_neon_vst3_lane_v:
6919   case NEON::BI__builtin_neon_vst3q_lane_v: {
6920     Ops.push_back(Ops[0]);
6921     Ops.erase(Ops.begin());
6922     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6923     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6924     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6925                         Ops, "");
6926   }
6927   case NEON::BI__builtin_neon_vst4_v:
6928   case NEON::BI__builtin_neon_vst4q_v: {
6929     Ops.push_back(Ops[0]);
6930     Ops.erase(Ops.begin());
6931     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6932     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6933                         Ops, "");
6934   }
6935   case NEON::BI__builtin_neon_vst4_lane_v:
6936   case NEON::BI__builtin_neon_vst4q_lane_v: {
6937     Ops.push_back(Ops[0]);
6938     Ops.erase(Ops.begin());
6939     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6940     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6941     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6942                         Ops, "");
6943   }
6944   case NEON::BI__builtin_neon_vtrn_v:
6945   case NEON::BI__builtin_neon_vtrnq_v: {
6946     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6947     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6948     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6949     Value *SV = nullptr;
6950 
6951     for (unsigned vi = 0; vi != 2; ++vi) {
6952       SmallVector<uint32_t, 16> Indices;
6953       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6954         Indices.push_back(i+vi);
6955         Indices.push_back(i+e+vi);
6956       }
6957       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6958       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
6959       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6960     }
6961     return SV;
6962   }
6963   case NEON::BI__builtin_neon_vuzp_v:
6964   case NEON::BI__builtin_neon_vuzpq_v: {
6965     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6966     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6967     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6968     Value *SV = nullptr;
6969 
6970     for (unsigned vi = 0; vi != 2; ++vi) {
6971       SmallVector<uint32_t, 16> Indices;
6972       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6973         Indices.push_back(2*i+vi);
6974 
6975       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6976       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
6977       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6978     }
6979     return SV;
6980   }
6981   case NEON::BI__builtin_neon_vzip_v:
6982   case NEON::BI__builtin_neon_vzipq_v: {
6983     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6984     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6985     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6986     Value *SV = nullptr;
6987 
6988     for (unsigned vi = 0; vi != 2; ++vi) {
6989       SmallVector<uint32_t, 16> Indices;
6990       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6991         Indices.push_back((i + vi*e) >> 1);
6992         Indices.push_back(((i + vi*e) >> 1)+e);
6993       }
6994       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6995       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
6996       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6997     }
6998     return SV;
6999   }
7000   case NEON::BI__builtin_neon_vqtbl1q_v: {
7001     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7002                         Ops, "vtbl1");
7003   }
7004   case NEON::BI__builtin_neon_vqtbl2q_v: {
7005     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7006                         Ops, "vtbl2");
7007   }
7008   case NEON::BI__builtin_neon_vqtbl3q_v: {
7009     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7010                         Ops, "vtbl3");
7011   }
7012   case NEON::BI__builtin_neon_vqtbl4q_v: {
7013     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7014                         Ops, "vtbl4");
7015   }
7016   case NEON::BI__builtin_neon_vqtbx1q_v: {
7017     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7018                         Ops, "vtbx1");
7019   }
7020   case NEON::BI__builtin_neon_vqtbx2q_v: {
7021     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7022                         Ops, "vtbx2");
7023   }
7024   case NEON::BI__builtin_neon_vqtbx3q_v: {
7025     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7026                         Ops, "vtbx3");
7027   }
7028   case NEON::BI__builtin_neon_vqtbx4q_v: {
7029     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7030                         Ops, "vtbx4");
7031   }
7032   case NEON::BI__builtin_neon_vsqadd_v:
7033   case NEON::BI__builtin_neon_vsqaddq_v: {
7034     Int = Intrinsic::aarch64_neon_usqadd;
7035     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
7036   }
7037   case NEON::BI__builtin_neon_vuqadd_v:
7038   case NEON::BI__builtin_neon_vuqaddq_v: {
7039     Int = Intrinsic::aarch64_neon_suqadd;
7040     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
7041   }
7042   }
7043 }
7044 
7045 llvm::Value *CodeGenFunction::
7046 BuildVector(ArrayRef<llvm::Value*> Ops) {
7047   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7048          "Not a power-of-two sized vector!");
7049   bool AllConstants = true;
7050   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7051     AllConstants &= isa<Constant>(Ops[i]);
7052 
7053   // If this is a constant vector, create a ConstantVector.
7054   if (AllConstants) {
7055     SmallVector<llvm::Constant*, 16> CstOps;
7056     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7057       CstOps.push_back(cast<Constant>(Ops[i]));
7058     return llvm::ConstantVector::get(CstOps);
7059   }
7060 
7061   // Otherwise, insertelement the values to build the vector.
7062   Value *Result =
7063     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
7064 
7065   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7066     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
7067 
7068   return Result;
7069 }
7070 
7071 // Convert the mask from an integer type to a vector of i1.
7072 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
7073                               unsigned NumElts) {
7074 
7075   llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
7076                          cast<IntegerType>(Mask->getType())->getBitWidth());
7077   Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
7078 
7079   // If we have less than 8 elements, then the starting mask was an i8 and
7080   // we need to extract down to the right number of elements.
7081   if (NumElts < 8) {
7082     uint32_t Indices[4];
7083     for (unsigned i = 0; i != NumElts; ++i)
7084       Indices[i] = i;
7085     MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
7086                                              makeArrayRef(Indices, NumElts),
7087                                              "extract");
7088   }
7089   return MaskVec;
7090 }
7091 
7092 static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
7093                                  SmallVectorImpl<Value *> &Ops,
7094                                  unsigned Align) {
7095   // Cast the pointer to right type.
7096   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7097                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7098 
7099   // If the mask is all ones just emit a regular store.
7100   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7101     if (C->isAllOnesValue())
7102       return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
7103 
7104   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7105                                    Ops[1]->getType()->getVectorNumElements());
7106 
7107   return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
7108 }
7109 
7110 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
7111                                 SmallVectorImpl<Value *> &Ops, unsigned Align) {
7112   // Cast the pointer to right type.
7113   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7114                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7115 
7116   // If the mask is all ones just emit a regular store.
7117   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7118     if (C->isAllOnesValue())
7119       return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7120 
7121   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7122                                    Ops[1]->getType()->getVectorNumElements());
7123 
7124   return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
7125 }
7126 
7127 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
7128                                         SmallVectorImpl<Value *> &Ops,
7129                                         llvm::Type *DstTy,
7130                                         unsigned SrcSizeInBits,
7131                                         unsigned Align) {
7132   // Load the subvector.
7133   Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7134 
7135   // Create broadcast mask.
7136   unsigned NumDstElts = DstTy->getVectorNumElements();
7137   unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
7138 
7139   SmallVector<uint32_t, 8> Mask;
7140   for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
7141     for (unsigned j = 0; j != NumSrcElts; ++j)
7142       Mask.push_back(j);
7143 
7144   return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
7145 }
7146 
7147 static Value *EmitX86Select(CodeGenFunction &CGF,
7148                             Value *Mask, Value *Op0, Value *Op1) {
7149 
7150   // If the mask is all ones just return first argument.
7151   if (const auto *C = dyn_cast<Constant>(Mask))
7152     if (C->isAllOnesValue())
7153       return Op0;
7154 
7155   Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
7156 
7157   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
7158 }
7159 
7160 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
7161                                    bool Signed, SmallVectorImpl<Value *> &Ops) {
7162   unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7163   Value *Cmp;
7164 
7165   if (CC == 3) {
7166     Cmp = Constant::getNullValue(
7167                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7168   } else if (CC == 7) {
7169     Cmp = Constant::getAllOnesValue(
7170                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7171   } else {
7172     ICmpInst::Predicate Pred;
7173     switch (CC) {
7174     default: llvm_unreachable("Unknown condition code");
7175     case 0: Pred = ICmpInst::ICMP_EQ;  break;
7176     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
7177     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
7178     case 4: Pred = ICmpInst::ICMP_NE;  break;
7179     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
7180     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
7181     }
7182     Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7183   }
7184 
7185   const auto *C = dyn_cast<Constant>(Ops.back());
7186   if (!C || !C->isAllOnesValue())
7187     Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
7188 
7189   if (NumElts < 8) {
7190     uint32_t Indices[8];
7191     for (unsigned i = 0; i != NumElts; ++i)
7192       Indices[i] = i;
7193     for (unsigned i = NumElts; i != 8; ++i)
7194       Indices[i] = i % NumElts + NumElts;
7195     Cmp = CGF.Builder.CreateShuffleVector(
7196         Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
7197   }
7198   return CGF.Builder.CreateBitCast(Cmp,
7199                                    IntegerType::get(CGF.getLLVMContext(),
7200                                                     std::max(NumElts, 8U)));
7201 }
7202 
7203 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
7204                             ArrayRef<Value *> Ops) {
7205   Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7206   Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7207 
7208   if (Ops.size() == 2)
7209     return Res;
7210 
7211   assert(Ops.size() == 4);
7212   return EmitX86Select(CGF, Ops[3], Res, Ops[2]);
7213 }
7214 
7215 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
7216                               llvm::Type *DstTy) {
7217   unsigned NumberOfElements = DstTy->getVectorNumElements();
7218   Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
7219   return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
7220 }
7221 
7222 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
7223                                            const CallExpr *E) {
7224   if (BuiltinID == X86::BI__builtin_ms_va_start ||
7225       BuiltinID == X86::BI__builtin_ms_va_end)
7226     return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
7227                           BuiltinID == X86::BI__builtin_ms_va_start);
7228   if (BuiltinID == X86::BI__builtin_ms_va_copy) {
7229     // Lower this manually. We can't reliably determine whether or not any
7230     // given va_copy() is for a Win64 va_list from the calling convention
7231     // alone, because it's legal to do this from a System V ABI function.
7232     // With opaque pointer types, we won't have enough information in LLVM
7233     // IR to determine this from the argument types, either. Best to do it
7234     // now, while we have enough information.
7235     Address DestAddr = EmitMSVAListRef(E->getArg(0));
7236     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
7237 
7238     llvm::Type *BPP = Int8PtrPtrTy;
7239 
7240     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
7241                        DestAddr.getAlignment());
7242     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
7243                       SrcAddr.getAlignment());
7244 
7245     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
7246     return Builder.CreateStore(ArgPtr, DestAddr);
7247   }
7248 
7249   SmallVector<Value*, 4> Ops;
7250 
7251   // Find out if any arguments are required to be integer constant expressions.
7252   unsigned ICEArguments = 0;
7253   ASTContext::GetBuiltinTypeError Error;
7254   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
7255   assert(Error == ASTContext::GE_None && "Should not codegen an error");
7256 
7257   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
7258     // If this is a normal argument, just emit it as a scalar.
7259     if ((ICEArguments & (1 << i)) == 0) {
7260       Ops.push_back(EmitScalarExpr(E->getArg(i)));
7261       continue;
7262     }
7263 
7264     // If this is required to be a constant, constant fold it so that we know
7265     // that the generated intrinsic gets a ConstantInt.
7266     llvm::APSInt Result;
7267     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
7268     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
7269     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
7270   }
7271 
7272   // These exist so that the builtin that takes an immediate can be bounds
7273   // checked by clang to avoid passing bad immediates to the backend. Since
7274   // AVX has a larger immediate than SSE we would need separate builtins to
7275   // do the different bounds checking. Rather than create a clang specific
7276   // SSE only builtin, this implements eight separate builtins to match gcc
7277   // implementation.
7278   auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
7279     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
7280     llvm::Function *F = CGM.getIntrinsic(ID);
7281     return Builder.CreateCall(F, Ops);
7282   };
7283 
7284   // For the vector forms of FP comparisons, translate the builtins directly to
7285   // IR.
7286   // TODO: The builtins could be removed if the SSE header files used vector
7287   // extension comparisons directly (vector ordered/unordered may need
7288   // additional support via __builtin_isnan()).
7289   auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
7290     Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
7291     llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
7292     llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
7293     Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
7294     return Builder.CreateBitCast(Sext, FPVecTy);
7295   };
7296 
7297   switch (BuiltinID) {
7298   default: return nullptr;
7299   case X86::BI__builtin_cpu_supports: {
7300     const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
7301     StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
7302 
7303     // TODO: When/if this becomes more than x86 specific then use a TargetInfo
7304     // based mapping.
7305     // Processor features and mapping to processor feature value.
7306     enum X86Features {
7307       CMOV = 0,
7308       MMX,
7309       POPCNT,
7310       SSE,
7311       SSE2,
7312       SSE3,
7313       SSSE3,
7314       SSE4_1,
7315       SSE4_2,
7316       AVX,
7317       AVX2,
7318       SSE4_A,
7319       FMA4,
7320       XOP,
7321       FMA,
7322       AVX512F,
7323       BMI,
7324       BMI2,
7325       AES,
7326       PCLMUL,
7327       AVX512VL,
7328       AVX512BW,
7329       AVX512DQ,
7330       AVX512CD,
7331       AVX512ER,
7332       AVX512PF,
7333       AVX512VBMI,
7334       AVX512IFMA,
7335       MAX
7336     };
7337 
7338     X86Features Feature = StringSwitch<X86Features>(FeatureStr)
7339                               .Case("cmov", X86Features::CMOV)
7340                               .Case("mmx", X86Features::MMX)
7341                               .Case("popcnt", X86Features::POPCNT)
7342                               .Case("sse", X86Features::SSE)
7343                               .Case("sse2", X86Features::SSE2)
7344                               .Case("sse3", X86Features::SSE3)
7345                               .Case("ssse3", X86Features::SSSE3)
7346                               .Case("sse4.1", X86Features::SSE4_1)
7347                               .Case("sse4.2", X86Features::SSE4_2)
7348                               .Case("avx", X86Features::AVX)
7349                               .Case("avx2", X86Features::AVX2)
7350                               .Case("sse4a", X86Features::SSE4_A)
7351                               .Case("fma4", X86Features::FMA4)
7352                               .Case("xop", X86Features::XOP)
7353                               .Case("fma", X86Features::FMA)
7354                               .Case("avx512f", X86Features::AVX512F)
7355                               .Case("bmi", X86Features::BMI)
7356                               .Case("bmi2", X86Features::BMI2)
7357                               .Case("aes", X86Features::AES)
7358                               .Case("pclmul", X86Features::PCLMUL)
7359                               .Case("avx512vl", X86Features::AVX512VL)
7360                               .Case("avx512bw", X86Features::AVX512BW)
7361                               .Case("avx512dq", X86Features::AVX512DQ)
7362                               .Case("avx512cd", X86Features::AVX512CD)
7363                               .Case("avx512er", X86Features::AVX512ER)
7364                               .Case("avx512pf", X86Features::AVX512PF)
7365                               .Case("avx512vbmi", X86Features::AVX512VBMI)
7366                               .Case("avx512ifma", X86Features::AVX512IFMA)
7367                               .Default(X86Features::MAX);
7368     assert(Feature != X86Features::MAX && "Invalid feature!");
7369 
7370     // Matching the struct layout from the compiler-rt/libgcc structure that is
7371     // filled in:
7372     // unsigned int __cpu_vendor;
7373     // unsigned int __cpu_type;
7374     // unsigned int __cpu_subtype;
7375     // unsigned int __cpu_features[1];
7376     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
7377                                             llvm::ArrayType::get(Int32Ty, 1));
7378 
7379     // Grab the global __cpu_model.
7380     llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
7381 
7382     // Grab the first (0th) element from the field __cpu_features off of the
7383     // global in the struct STy.
7384     Value *Idxs[] = {
7385       ConstantInt::get(Int32Ty, 0),
7386       ConstantInt::get(Int32Ty, 3),
7387       ConstantInt::get(Int32Ty, 0)
7388     };
7389     Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
7390     Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
7391                                                 CharUnits::fromQuantity(4));
7392 
7393     // Check the value of the bit corresponding to the feature requested.
7394     Value *Bitset = Builder.CreateAnd(
7395         Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
7396     return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
7397   }
7398   case X86::BI_mm_prefetch: {
7399     Value *Address = Ops[0];
7400     Value *RW = ConstantInt::get(Int32Ty, 0);
7401     Value *Locality = Ops[1];
7402     Value *Data = ConstantInt::get(Int32Ty, 1);
7403     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
7404     return Builder.CreateCall(F, {Address, RW, Locality, Data});
7405   }
7406   case X86::BI_mm_clflush: {
7407     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
7408                               Ops[0]);
7409   }
7410   case X86::BI_mm_lfence: {
7411     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
7412   }
7413   case X86::BI_mm_mfence: {
7414     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
7415   }
7416   case X86::BI_mm_sfence: {
7417     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
7418   }
7419   case X86::BI_mm_pause: {
7420     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
7421   }
7422   case X86::BI__rdtsc: {
7423     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
7424   }
7425   case X86::BI__builtin_ia32_undef128:
7426   case X86::BI__builtin_ia32_undef256:
7427   case X86::BI__builtin_ia32_undef512:
7428     // The x86 definition of "undef" is not the same as the LLVM definition
7429     // (PR32176). We leave optimizing away an unnecessary zero constant to the
7430     // IR optimizer and backend.
7431     // TODO: If we had a "freeze" IR instruction to generate a fixed undef
7432     // value, we should use that here instead of a zero.
7433     return llvm::Constant::getNullValue(ConvertType(E->getType()));
7434   case X86::BI__builtin_ia32_vec_init_v8qi:
7435   case X86::BI__builtin_ia32_vec_init_v4hi:
7436   case X86::BI__builtin_ia32_vec_init_v2si:
7437     return Builder.CreateBitCast(BuildVector(Ops),
7438                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
7439   case X86::BI__builtin_ia32_vec_ext_v2si:
7440     return Builder.CreateExtractElement(Ops[0],
7441                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
7442   case X86::BI_mm_setcsr:
7443   case X86::BI__builtin_ia32_ldmxcsr: {
7444     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
7445     Builder.CreateStore(Ops[0], Tmp);
7446     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
7447                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7448   }
7449   case X86::BI_mm_getcsr:
7450   case X86::BI__builtin_ia32_stmxcsr: {
7451     Address Tmp = CreateMemTemp(E->getType());
7452     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
7453                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7454     return Builder.CreateLoad(Tmp, "stmxcsr");
7455   }
7456   case X86::BI__builtin_ia32_xsave:
7457   case X86::BI__builtin_ia32_xsave64:
7458   case X86::BI__builtin_ia32_xrstor:
7459   case X86::BI__builtin_ia32_xrstor64:
7460   case X86::BI__builtin_ia32_xsaveopt:
7461   case X86::BI__builtin_ia32_xsaveopt64:
7462   case X86::BI__builtin_ia32_xrstors:
7463   case X86::BI__builtin_ia32_xrstors64:
7464   case X86::BI__builtin_ia32_xsavec:
7465   case X86::BI__builtin_ia32_xsavec64:
7466   case X86::BI__builtin_ia32_xsaves:
7467   case X86::BI__builtin_ia32_xsaves64: {
7468     Intrinsic::ID ID;
7469 #define INTRINSIC_X86_XSAVE_ID(NAME) \
7470     case X86::BI__builtin_ia32_##NAME: \
7471       ID = Intrinsic::x86_##NAME; \
7472       break
7473     switch (BuiltinID) {
7474     default: llvm_unreachable("Unsupported intrinsic!");
7475     INTRINSIC_X86_XSAVE_ID(xsave);
7476     INTRINSIC_X86_XSAVE_ID(xsave64);
7477     INTRINSIC_X86_XSAVE_ID(xrstor);
7478     INTRINSIC_X86_XSAVE_ID(xrstor64);
7479     INTRINSIC_X86_XSAVE_ID(xsaveopt);
7480     INTRINSIC_X86_XSAVE_ID(xsaveopt64);
7481     INTRINSIC_X86_XSAVE_ID(xrstors);
7482     INTRINSIC_X86_XSAVE_ID(xrstors64);
7483     INTRINSIC_X86_XSAVE_ID(xsavec);
7484     INTRINSIC_X86_XSAVE_ID(xsavec64);
7485     INTRINSIC_X86_XSAVE_ID(xsaves);
7486     INTRINSIC_X86_XSAVE_ID(xsaves64);
7487     }
7488 #undef INTRINSIC_X86_XSAVE_ID
7489     Value *Mhi = Builder.CreateTrunc(
7490       Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
7491     Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
7492     Ops[1] = Mhi;
7493     Ops.push_back(Mlo);
7494     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7495   }
7496   case X86::BI__builtin_ia32_storedqudi128_mask:
7497   case X86::BI__builtin_ia32_storedqusi128_mask:
7498   case X86::BI__builtin_ia32_storedquhi128_mask:
7499   case X86::BI__builtin_ia32_storedquqi128_mask:
7500   case X86::BI__builtin_ia32_storeupd128_mask:
7501   case X86::BI__builtin_ia32_storeups128_mask:
7502   case X86::BI__builtin_ia32_storedqudi256_mask:
7503   case X86::BI__builtin_ia32_storedqusi256_mask:
7504   case X86::BI__builtin_ia32_storedquhi256_mask:
7505   case X86::BI__builtin_ia32_storedquqi256_mask:
7506   case X86::BI__builtin_ia32_storeupd256_mask:
7507   case X86::BI__builtin_ia32_storeups256_mask:
7508   case X86::BI__builtin_ia32_storedqudi512_mask:
7509   case X86::BI__builtin_ia32_storedqusi512_mask:
7510   case X86::BI__builtin_ia32_storedquhi512_mask:
7511   case X86::BI__builtin_ia32_storedquqi512_mask:
7512   case X86::BI__builtin_ia32_storeupd512_mask:
7513   case X86::BI__builtin_ia32_storeups512_mask:
7514     return EmitX86MaskedStore(*this, Ops, 1);
7515 
7516   case X86::BI__builtin_ia32_storess128_mask:
7517   case X86::BI__builtin_ia32_storesd128_mask: {
7518     return EmitX86MaskedStore(*this, Ops, 16);
7519   }
7520 
7521   case X86::BI__builtin_ia32_cvtmask2b128:
7522   case X86::BI__builtin_ia32_cvtmask2b256:
7523   case X86::BI__builtin_ia32_cvtmask2b512:
7524   case X86::BI__builtin_ia32_cvtmask2w128:
7525   case X86::BI__builtin_ia32_cvtmask2w256:
7526   case X86::BI__builtin_ia32_cvtmask2w512:
7527   case X86::BI__builtin_ia32_cvtmask2d128:
7528   case X86::BI__builtin_ia32_cvtmask2d256:
7529   case X86::BI__builtin_ia32_cvtmask2d512:
7530   case X86::BI__builtin_ia32_cvtmask2q128:
7531   case X86::BI__builtin_ia32_cvtmask2q256:
7532   case X86::BI__builtin_ia32_cvtmask2q512:
7533     return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
7534 
7535   case X86::BI__builtin_ia32_movdqa32store128_mask:
7536   case X86::BI__builtin_ia32_movdqa64store128_mask:
7537   case X86::BI__builtin_ia32_storeaps128_mask:
7538   case X86::BI__builtin_ia32_storeapd128_mask:
7539   case X86::BI__builtin_ia32_movdqa32store256_mask:
7540   case X86::BI__builtin_ia32_movdqa64store256_mask:
7541   case X86::BI__builtin_ia32_storeaps256_mask:
7542   case X86::BI__builtin_ia32_storeapd256_mask:
7543   case X86::BI__builtin_ia32_movdqa32store512_mask:
7544   case X86::BI__builtin_ia32_movdqa64store512_mask:
7545   case X86::BI__builtin_ia32_storeaps512_mask:
7546   case X86::BI__builtin_ia32_storeapd512_mask: {
7547     unsigned Align =
7548       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7549     return EmitX86MaskedStore(*this, Ops, Align);
7550   }
7551   case X86::BI__builtin_ia32_loadups128_mask:
7552   case X86::BI__builtin_ia32_loadups256_mask:
7553   case X86::BI__builtin_ia32_loadups512_mask:
7554   case X86::BI__builtin_ia32_loadupd128_mask:
7555   case X86::BI__builtin_ia32_loadupd256_mask:
7556   case X86::BI__builtin_ia32_loadupd512_mask:
7557   case X86::BI__builtin_ia32_loaddquqi128_mask:
7558   case X86::BI__builtin_ia32_loaddquqi256_mask:
7559   case X86::BI__builtin_ia32_loaddquqi512_mask:
7560   case X86::BI__builtin_ia32_loaddquhi128_mask:
7561   case X86::BI__builtin_ia32_loaddquhi256_mask:
7562   case X86::BI__builtin_ia32_loaddquhi512_mask:
7563   case X86::BI__builtin_ia32_loaddqusi128_mask:
7564   case X86::BI__builtin_ia32_loaddqusi256_mask:
7565   case X86::BI__builtin_ia32_loaddqusi512_mask:
7566   case X86::BI__builtin_ia32_loaddqudi128_mask:
7567   case X86::BI__builtin_ia32_loaddqudi256_mask:
7568   case X86::BI__builtin_ia32_loaddqudi512_mask:
7569     return EmitX86MaskedLoad(*this, Ops, 1);
7570 
7571   case X86::BI__builtin_ia32_loadss128_mask:
7572   case X86::BI__builtin_ia32_loadsd128_mask:
7573     return EmitX86MaskedLoad(*this, Ops, 16);
7574 
7575   case X86::BI__builtin_ia32_loadaps128_mask:
7576   case X86::BI__builtin_ia32_loadaps256_mask:
7577   case X86::BI__builtin_ia32_loadaps512_mask:
7578   case X86::BI__builtin_ia32_loadapd128_mask:
7579   case X86::BI__builtin_ia32_loadapd256_mask:
7580   case X86::BI__builtin_ia32_loadapd512_mask:
7581   case X86::BI__builtin_ia32_movdqa32load128_mask:
7582   case X86::BI__builtin_ia32_movdqa32load256_mask:
7583   case X86::BI__builtin_ia32_movdqa32load512_mask:
7584   case X86::BI__builtin_ia32_movdqa64load128_mask:
7585   case X86::BI__builtin_ia32_movdqa64load256_mask:
7586   case X86::BI__builtin_ia32_movdqa64load512_mask: {
7587     unsigned Align =
7588       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7589     return EmitX86MaskedLoad(*this, Ops, Align);
7590   }
7591 
7592   case X86::BI__builtin_ia32_vbroadcastf128_pd256:
7593   case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
7594     llvm::Type *DstTy = ConvertType(E->getType());
7595     return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
7596   }
7597 
7598   case X86::BI__builtin_ia32_storehps:
7599   case X86::BI__builtin_ia32_storelps: {
7600     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
7601     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
7602 
7603     // cast val v2i64
7604     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
7605 
7606     // extract (0, 1)
7607     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
7608     llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
7609     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
7610 
7611     // cast pointer to i64 & store
7612     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
7613     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7614   }
7615   case X86::BI__builtin_ia32_palignr128:
7616   case X86::BI__builtin_ia32_palignr256:
7617   case X86::BI__builtin_ia32_palignr512_mask: {
7618     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7619 
7620     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7621     assert(NumElts % 16 == 0);
7622 
7623     // If palignr is shifting the pair of vectors more than the size of two
7624     // lanes, emit zero.
7625     if (ShiftVal >= 32)
7626       return llvm::Constant::getNullValue(ConvertType(E->getType()));
7627 
7628     // If palignr is shifting the pair of input vectors more than one lane,
7629     // but less than two lanes, convert to shifting in zeroes.
7630     if (ShiftVal > 16) {
7631       ShiftVal -= 16;
7632       Ops[1] = Ops[0];
7633       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
7634     }
7635 
7636     uint32_t Indices[64];
7637     // 256-bit palignr operates on 128-bit lanes so we need to handle that
7638     for (unsigned l = 0; l != NumElts; l += 16) {
7639       for (unsigned i = 0; i != 16; ++i) {
7640         unsigned Idx = ShiftVal + i;
7641         if (Idx >= 16)
7642           Idx += NumElts - 16; // End of lane, switch operand.
7643         Indices[l + i] = Idx + l;
7644       }
7645     }
7646 
7647     Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
7648                                                makeArrayRef(Indices, NumElts),
7649                                                "palignr");
7650 
7651     // If this isn't a masked builtin, just return the align operation.
7652     if (Ops.size() == 3)
7653       return Align;
7654 
7655     return EmitX86Select(*this, Ops[4], Align, Ops[3]);
7656   }
7657 
7658   case X86::BI__builtin_ia32_movnti:
7659   case X86::BI__builtin_ia32_movnti64:
7660   case X86::BI__builtin_ia32_movntsd:
7661   case X86::BI__builtin_ia32_movntss: {
7662     llvm::MDNode *Node = llvm::MDNode::get(
7663         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7664 
7665     Value *Ptr = Ops[0];
7666     Value *Src = Ops[1];
7667 
7668     // Extract the 0'th element of the source vector.
7669     if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
7670         BuiltinID == X86::BI__builtin_ia32_movntss)
7671       Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
7672 
7673     // Convert the type of the pointer to a pointer to the stored type.
7674     Value *BC = Builder.CreateBitCast(
7675         Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
7676 
7677     // Unaligned nontemporal store of the scalar value.
7678     StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
7679     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7680     SI->setAlignment(1);
7681     return SI;
7682   }
7683 
7684   case X86::BI__builtin_ia32_selectb_128:
7685   case X86::BI__builtin_ia32_selectb_256:
7686   case X86::BI__builtin_ia32_selectb_512:
7687   case X86::BI__builtin_ia32_selectw_128:
7688   case X86::BI__builtin_ia32_selectw_256:
7689   case X86::BI__builtin_ia32_selectw_512:
7690   case X86::BI__builtin_ia32_selectd_128:
7691   case X86::BI__builtin_ia32_selectd_256:
7692   case X86::BI__builtin_ia32_selectd_512:
7693   case X86::BI__builtin_ia32_selectq_128:
7694   case X86::BI__builtin_ia32_selectq_256:
7695   case X86::BI__builtin_ia32_selectq_512:
7696   case X86::BI__builtin_ia32_selectps_128:
7697   case X86::BI__builtin_ia32_selectps_256:
7698   case X86::BI__builtin_ia32_selectps_512:
7699   case X86::BI__builtin_ia32_selectpd_128:
7700   case X86::BI__builtin_ia32_selectpd_256:
7701   case X86::BI__builtin_ia32_selectpd_512:
7702     return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
7703   case X86::BI__builtin_ia32_pcmpeqb128_mask:
7704   case X86::BI__builtin_ia32_pcmpeqb256_mask:
7705   case X86::BI__builtin_ia32_pcmpeqb512_mask:
7706   case X86::BI__builtin_ia32_pcmpeqw128_mask:
7707   case X86::BI__builtin_ia32_pcmpeqw256_mask:
7708   case X86::BI__builtin_ia32_pcmpeqw512_mask:
7709   case X86::BI__builtin_ia32_pcmpeqd128_mask:
7710   case X86::BI__builtin_ia32_pcmpeqd256_mask:
7711   case X86::BI__builtin_ia32_pcmpeqd512_mask:
7712   case X86::BI__builtin_ia32_pcmpeqq128_mask:
7713   case X86::BI__builtin_ia32_pcmpeqq256_mask:
7714   case X86::BI__builtin_ia32_pcmpeqq512_mask:
7715     return EmitX86MaskedCompare(*this, 0, false, Ops);
7716   case X86::BI__builtin_ia32_pcmpgtb128_mask:
7717   case X86::BI__builtin_ia32_pcmpgtb256_mask:
7718   case X86::BI__builtin_ia32_pcmpgtb512_mask:
7719   case X86::BI__builtin_ia32_pcmpgtw128_mask:
7720   case X86::BI__builtin_ia32_pcmpgtw256_mask:
7721   case X86::BI__builtin_ia32_pcmpgtw512_mask:
7722   case X86::BI__builtin_ia32_pcmpgtd128_mask:
7723   case X86::BI__builtin_ia32_pcmpgtd256_mask:
7724   case X86::BI__builtin_ia32_pcmpgtd512_mask:
7725   case X86::BI__builtin_ia32_pcmpgtq128_mask:
7726   case X86::BI__builtin_ia32_pcmpgtq256_mask:
7727   case X86::BI__builtin_ia32_pcmpgtq512_mask:
7728     return EmitX86MaskedCompare(*this, 6, true, Ops);
7729   case X86::BI__builtin_ia32_cmpb128_mask:
7730   case X86::BI__builtin_ia32_cmpb256_mask:
7731   case X86::BI__builtin_ia32_cmpb512_mask:
7732   case X86::BI__builtin_ia32_cmpw128_mask:
7733   case X86::BI__builtin_ia32_cmpw256_mask:
7734   case X86::BI__builtin_ia32_cmpw512_mask:
7735   case X86::BI__builtin_ia32_cmpd128_mask:
7736   case X86::BI__builtin_ia32_cmpd256_mask:
7737   case X86::BI__builtin_ia32_cmpd512_mask:
7738   case X86::BI__builtin_ia32_cmpq128_mask:
7739   case X86::BI__builtin_ia32_cmpq256_mask:
7740   case X86::BI__builtin_ia32_cmpq512_mask: {
7741     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7742     return EmitX86MaskedCompare(*this, CC, true, Ops);
7743   }
7744   case X86::BI__builtin_ia32_ucmpb128_mask:
7745   case X86::BI__builtin_ia32_ucmpb256_mask:
7746   case X86::BI__builtin_ia32_ucmpb512_mask:
7747   case X86::BI__builtin_ia32_ucmpw128_mask:
7748   case X86::BI__builtin_ia32_ucmpw256_mask:
7749   case X86::BI__builtin_ia32_ucmpw512_mask:
7750   case X86::BI__builtin_ia32_ucmpd128_mask:
7751   case X86::BI__builtin_ia32_ucmpd256_mask:
7752   case X86::BI__builtin_ia32_ucmpd512_mask:
7753   case X86::BI__builtin_ia32_ucmpq128_mask:
7754   case X86::BI__builtin_ia32_ucmpq256_mask:
7755   case X86::BI__builtin_ia32_ucmpq512_mask: {
7756     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7757     return EmitX86MaskedCompare(*this, CC, false, Ops);
7758   }
7759 
7760   case X86::BI__builtin_ia32_vplzcntd_128_mask:
7761   case X86::BI__builtin_ia32_vplzcntd_256_mask:
7762   case X86::BI__builtin_ia32_vplzcntd_512_mask:
7763   case X86::BI__builtin_ia32_vplzcntq_128_mask:
7764   case X86::BI__builtin_ia32_vplzcntq_256_mask:
7765   case X86::BI__builtin_ia32_vplzcntq_512_mask: {
7766     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
7767     return EmitX86Select(*this, Ops[2],
7768                          Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
7769                          Ops[1]);
7770   }
7771 
7772   case X86::BI__builtin_ia32_pmaxsb128:
7773   case X86::BI__builtin_ia32_pmaxsw128:
7774   case X86::BI__builtin_ia32_pmaxsd128:
7775   case X86::BI__builtin_ia32_pmaxsq128_mask:
7776   case X86::BI__builtin_ia32_pmaxsb256:
7777   case X86::BI__builtin_ia32_pmaxsw256:
7778   case X86::BI__builtin_ia32_pmaxsd256:
7779   case X86::BI__builtin_ia32_pmaxsq256_mask:
7780   case X86::BI__builtin_ia32_pmaxsb512_mask:
7781   case X86::BI__builtin_ia32_pmaxsw512_mask:
7782   case X86::BI__builtin_ia32_pmaxsd512_mask:
7783   case X86::BI__builtin_ia32_pmaxsq512_mask:
7784     return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
7785   case X86::BI__builtin_ia32_pmaxub128:
7786   case X86::BI__builtin_ia32_pmaxuw128:
7787   case X86::BI__builtin_ia32_pmaxud128:
7788   case X86::BI__builtin_ia32_pmaxuq128_mask:
7789   case X86::BI__builtin_ia32_pmaxub256:
7790   case X86::BI__builtin_ia32_pmaxuw256:
7791   case X86::BI__builtin_ia32_pmaxud256:
7792   case X86::BI__builtin_ia32_pmaxuq256_mask:
7793   case X86::BI__builtin_ia32_pmaxub512_mask:
7794   case X86::BI__builtin_ia32_pmaxuw512_mask:
7795   case X86::BI__builtin_ia32_pmaxud512_mask:
7796   case X86::BI__builtin_ia32_pmaxuq512_mask:
7797     return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
7798   case X86::BI__builtin_ia32_pminsb128:
7799   case X86::BI__builtin_ia32_pminsw128:
7800   case X86::BI__builtin_ia32_pminsd128:
7801   case X86::BI__builtin_ia32_pminsq128_mask:
7802   case X86::BI__builtin_ia32_pminsb256:
7803   case X86::BI__builtin_ia32_pminsw256:
7804   case X86::BI__builtin_ia32_pminsd256:
7805   case X86::BI__builtin_ia32_pminsq256_mask:
7806   case X86::BI__builtin_ia32_pminsb512_mask:
7807   case X86::BI__builtin_ia32_pminsw512_mask:
7808   case X86::BI__builtin_ia32_pminsd512_mask:
7809   case X86::BI__builtin_ia32_pminsq512_mask:
7810     return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
7811   case X86::BI__builtin_ia32_pminub128:
7812   case X86::BI__builtin_ia32_pminuw128:
7813   case X86::BI__builtin_ia32_pminud128:
7814   case X86::BI__builtin_ia32_pminuq128_mask:
7815   case X86::BI__builtin_ia32_pminub256:
7816   case X86::BI__builtin_ia32_pminuw256:
7817   case X86::BI__builtin_ia32_pminud256:
7818   case X86::BI__builtin_ia32_pminuq256_mask:
7819   case X86::BI__builtin_ia32_pminub512_mask:
7820   case X86::BI__builtin_ia32_pminuw512_mask:
7821   case X86::BI__builtin_ia32_pminud512_mask:
7822   case X86::BI__builtin_ia32_pminuq512_mask:
7823     return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
7824 
7825   // 3DNow!
7826   case X86::BI__builtin_ia32_pswapdsf:
7827   case X86::BI__builtin_ia32_pswapdsi: {
7828     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
7829     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
7830     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
7831     return Builder.CreateCall(F, Ops, "pswapd");
7832   }
7833   case X86::BI__builtin_ia32_rdrand16_step:
7834   case X86::BI__builtin_ia32_rdrand32_step:
7835   case X86::BI__builtin_ia32_rdrand64_step:
7836   case X86::BI__builtin_ia32_rdseed16_step:
7837   case X86::BI__builtin_ia32_rdseed32_step:
7838   case X86::BI__builtin_ia32_rdseed64_step: {
7839     Intrinsic::ID ID;
7840     switch (BuiltinID) {
7841     default: llvm_unreachable("Unsupported intrinsic!");
7842     case X86::BI__builtin_ia32_rdrand16_step:
7843       ID = Intrinsic::x86_rdrand_16;
7844       break;
7845     case X86::BI__builtin_ia32_rdrand32_step:
7846       ID = Intrinsic::x86_rdrand_32;
7847       break;
7848     case X86::BI__builtin_ia32_rdrand64_step:
7849       ID = Intrinsic::x86_rdrand_64;
7850       break;
7851     case X86::BI__builtin_ia32_rdseed16_step:
7852       ID = Intrinsic::x86_rdseed_16;
7853       break;
7854     case X86::BI__builtin_ia32_rdseed32_step:
7855       ID = Intrinsic::x86_rdseed_32;
7856       break;
7857     case X86::BI__builtin_ia32_rdseed64_step:
7858       ID = Intrinsic::x86_rdseed_64;
7859       break;
7860     }
7861 
7862     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
7863     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
7864                                       Ops[0]);
7865     return Builder.CreateExtractValue(Call, 1);
7866   }
7867 
7868   // SSE packed comparison intrinsics
7869   case X86::BI__builtin_ia32_cmpeqps:
7870   case X86::BI__builtin_ia32_cmpeqpd:
7871     return getVectorFCmpIR(CmpInst::FCMP_OEQ);
7872   case X86::BI__builtin_ia32_cmpltps:
7873   case X86::BI__builtin_ia32_cmpltpd:
7874     return getVectorFCmpIR(CmpInst::FCMP_OLT);
7875   case X86::BI__builtin_ia32_cmpleps:
7876   case X86::BI__builtin_ia32_cmplepd:
7877     return getVectorFCmpIR(CmpInst::FCMP_OLE);
7878   case X86::BI__builtin_ia32_cmpunordps:
7879   case X86::BI__builtin_ia32_cmpunordpd:
7880     return getVectorFCmpIR(CmpInst::FCMP_UNO);
7881   case X86::BI__builtin_ia32_cmpneqps:
7882   case X86::BI__builtin_ia32_cmpneqpd:
7883     return getVectorFCmpIR(CmpInst::FCMP_UNE);
7884   case X86::BI__builtin_ia32_cmpnltps:
7885   case X86::BI__builtin_ia32_cmpnltpd:
7886     return getVectorFCmpIR(CmpInst::FCMP_UGE);
7887   case X86::BI__builtin_ia32_cmpnleps:
7888   case X86::BI__builtin_ia32_cmpnlepd:
7889     return getVectorFCmpIR(CmpInst::FCMP_UGT);
7890   case X86::BI__builtin_ia32_cmpordps:
7891   case X86::BI__builtin_ia32_cmpordpd:
7892     return getVectorFCmpIR(CmpInst::FCMP_ORD);
7893   case X86::BI__builtin_ia32_cmpps:
7894   case X86::BI__builtin_ia32_cmpps256:
7895   case X86::BI__builtin_ia32_cmppd:
7896   case X86::BI__builtin_ia32_cmppd256: {
7897     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7898     // If this one of the SSE immediates, we can use native IR.
7899     if (CC < 8) {
7900       FCmpInst::Predicate Pred;
7901       switch (CC) {
7902       case 0: Pred = FCmpInst::FCMP_OEQ; break;
7903       case 1: Pred = FCmpInst::FCMP_OLT; break;
7904       case 2: Pred = FCmpInst::FCMP_OLE; break;
7905       case 3: Pred = FCmpInst::FCMP_UNO; break;
7906       case 4: Pred = FCmpInst::FCMP_UNE; break;
7907       case 5: Pred = FCmpInst::FCMP_UGE; break;
7908       case 6: Pred = FCmpInst::FCMP_UGT; break;
7909       case 7: Pred = FCmpInst::FCMP_ORD; break;
7910       }
7911       return getVectorFCmpIR(Pred);
7912     }
7913 
7914     // We can't handle 8-31 immediates with native IR, use the intrinsic.
7915     Intrinsic::ID ID;
7916     switch (BuiltinID) {
7917     default: llvm_unreachable("Unsupported intrinsic!");
7918     case X86::BI__builtin_ia32_cmpps:
7919       ID = Intrinsic::x86_sse_cmp_ps;
7920       break;
7921     case X86::BI__builtin_ia32_cmpps256:
7922       ID = Intrinsic::x86_avx_cmp_ps_256;
7923       break;
7924     case X86::BI__builtin_ia32_cmppd:
7925       ID = Intrinsic::x86_sse2_cmp_pd;
7926       break;
7927     case X86::BI__builtin_ia32_cmppd256:
7928       ID = Intrinsic::x86_avx_cmp_pd_256;
7929       break;
7930     }
7931 
7932     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7933   }
7934 
7935   // SSE scalar comparison intrinsics
7936   case X86::BI__builtin_ia32_cmpeqss:
7937     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
7938   case X86::BI__builtin_ia32_cmpltss:
7939     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
7940   case X86::BI__builtin_ia32_cmpless:
7941     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
7942   case X86::BI__builtin_ia32_cmpunordss:
7943     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
7944   case X86::BI__builtin_ia32_cmpneqss:
7945     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
7946   case X86::BI__builtin_ia32_cmpnltss:
7947     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
7948   case X86::BI__builtin_ia32_cmpnless:
7949     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
7950   case X86::BI__builtin_ia32_cmpordss:
7951     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
7952   case X86::BI__builtin_ia32_cmpeqsd:
7953     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
7954   case X86::BI__builtin_ia32_cmpltsd:
7955     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
7956   case X86::BI__builtin_ia32_cmplesd:
7957     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
7958   case X86::BI__builtin_ia32_cmpunordsd:
7959     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
7960   case X86::BI__builtin_ia32_cmpneqsd:
7961     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
7962   case X86::BI__builtin_ia32_cmpnltsd:
7963     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
7964   case X86::BI__builtin_ia32_cmpnlesd:
7965     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
7966   case X86::BI__builtin_ia32_cmpordsd:
7967     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
7968 
7969   case X86::BI__emul:
7970   case X86::BI__emulu: {
7971     llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
7972     bool isSigned = (BuiltinID == X86::BI__emul);
7973     Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
7974     Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
7975     return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
7976   }
7977   case X86::BI__mulh:
7978   case X86::BI__umulh:
7979   case X86::BI_mul128:
7980   case X86::BI_umul128: {
7981     llvm::Type *ResType = ConvertType(E->getType());
7982     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
7983 
7984     bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
7985     Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
7986     Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
7987 
7988     Value *MulResult, *HigherBits;
7989     if (IsSigned) {
7990       MulResult = Builder.CreateNSWMul(LHS, RHS);
7991       HigherBits = Builder.CreateAShr(MulResult, 64);
7992     } else {
7993       MulResult = Builder.CreateNUWMul(LHS, RHS);
7994       HigherBits = Builder.CreateLShr(MulResult, 64);
7995     }
7996     HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
7997 
7998     if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
7999       return HigherBits;
8000 
8001     Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
8002     Builder.CreateStore(HigherBits, HighBitsAddress);
8003     return Builder.CreateIntCast(MulResult, ResType, IsSigned);
8004   }
8005 
8006   case X86::BI__faststorefence: {
8007     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8008                                llvm::CrossThread);
8009   }
8010   case X86::BI_ReadWriteBarrier:
8011   case X86::BI_ReadBarrier:
8012   case X86::BI_WriteBarrier: {
8013     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8014                                llvm::SingleThread);
8015   }
8016   case X86::BI_BitScanForward:
8017   case X86::BI_BitScanForward64:
8018     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
8019   case X86::BI_BitScanReverse:
8020   case X86::BI_BitScanReverse64:
8021     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
8022 
8023   case X86::BI_InterlockedAnd64:
8024     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
8025   case X86::BI_InterlockedExchange64:
8026     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
8027   case X86::BI_InterlockedExchangeAdd64:
8028     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
8029   case X86::BI_InterlockedExchangeSub64:
8030     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
8031   case X86::BI_InterlockedOr64:
8032     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
8033   case X86::BI_InterlockedXor64:
8034     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
8035   case X86::BI_InterlockedDecrement64:
8036     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
8037   case X86::BI_InterlockedIncrement64:
8038     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
8039 
8040   case X86::BI_AddressOfReturnAddress: {
8041     Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
8042     return Builder.CreateCall(F);
8043   }
8044   case X86::BI__stosb: {
8045     // We treat __stosb as a volatile memset - it may not generate "rep stosb"
8046     // instruction, but it will create a memset that won't be optimized away.
8047     return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
8048   }
8049   case X86::BI__ud2:
8050     // llvm.trap makes a ud2a instruction on x86.
8051     return EmitTrapCall(Intrinsic::trap);
8052   case X86::BI__int2c: {
8053     // This syscall signals a driver assertion failure in x86 NT kernels.
8054     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
8055     llvm::InlineAsm *IA =
8056         llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true);
8057     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
8058         getLLVMContext(), llvm::AttributeList::FunctionIndex,
8059         llvm::Attribute::NoReturn);
8060     CallSite CS = Builder.CreateCall(IA);
8061     CS.setAttributes(NoReturnAttr);
8062     return CS.getInstruction();
8063   }
8064   case X86::BI__readfsbyte:
8065   case X86::BI__readfsword:
8066   case X86::BI__readfsdword:
8067   case X86::BI__readfsqword: {
8068     llvm::Type *IntTy = ConvertType(E->getType());
8069     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8070                                         llvm::PointerType::get(IntTy, 257));
8071     LoadInst *Load = Builder.CreateAlignedLoad(
8072         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8073     Load->setVolatile(true);
8074     return Load;
8075   }
8076   case X86::BI__readgsbyte:
8077   case X86::BI__readgsword:
8078   case X86::BI__readgsdword:
8079   case X86::BI__readgsqword: {
8080     llvm::Type *IntTy = ConvertType(E->getType());
8081     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8082                                         llvm::PointerType::get(IntTy, 256));
8083     LoadInst *Load = Builder.CreateAlignedLoad(
8084         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8085     Load->setVolatile(true);
8086     return Load;
8087   }
8088   }
8089 }
8090 
8091 
8092 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
8093                                            const CallExpr *E) {
8094   SmallVector<Value*, 4> Ops;
8095 
8096   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
8097     Ops.push_back(EmitScalarExpr(E->getArg(i)));
8098 
8099   Intrinsic::ID ID = Intrinsic::not_intrinsic;
8100 
8101   switch (BuiltinID) {
8102   default: return nullptr;
8103 
8104   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
8105   // call __builtin_readcyclecounter.
8106   case PPC::BI__builtin_ppc_get_timebase:
8107     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
8108 
8109   // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
8110   case PPC::BI__builtin_altivec_lvx:
8111   case PPC::BI__builtin_altivec_lvxl:
8112   case PPC::BI__builtin_altivec_lvebx:
8113   case PPC::BI__builtin_altivec_lvehx:
8114   case PPC::BI__builtin_altivec_lvewx:
8115   case PPC::BI__builtin_altivec_lvsl:
8116   case PPC::BI__builtin_altivec_lvsr:
8117   case PPC::BI__builtin_vsx_lxvd2x:
8118   case PPC::BI__builtin_vsx_lxvw4x:
8119   case PPC::BI__builtin_vsx_lxvd2x_be:
8120   case PPC::BI__builtin_vsx_lxvw4x_be:
8121   case PPC::BI__builtin_vsx_lxvl:
8122   case PPC::BI__builtin_vsx_lxvll:
8123   {
8124     if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
8125        BuiltinID == PPC::BI__builtin_vsx_lxvll){
8126       Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
8127     }else {
8128       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8129       Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
8130       Ops.pop_back();
8131     }
8132 
8133     switch (BuiltinID) {
8134     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
8135     case PPC::BI__builtin_altivec_lvx:
8136       ID = Intrinsic::ppc_altivec_lvx;
8137       break;
8138     case PPC::BI__builtin_altivec_lvxl:
8139       ID = Intrinsic::ppc_altivec_lvxl;
8140       break;
8141     case PPC::BI__builtin_altivec_lvebx:
8142       ID = Intrinsic::ppc_altivec_lvebx;
8143       break;
8144     case PPC::BI__builtin_altivec_lvehx:
8145       ID = Intrinsic::ppc_altivec_lvehx;
8146       break;
8147     case PPC::BI__builtin_altivec_lvewx:
8148       ID = Intrinsic::ppc_altivec_lvewx;
8149       break;
8150     case PPC::BI__builtin_altivec_lvsl:
8151       ID = Intrinsic::ppc_altivec_lvsl;
8152       break;
8153     case PPC::BI__builtin_altivec_lvsr:
8154       ID = Intrinsic::ppc_altivec_lvsr;
8155       break;
8156     case PPC::BI__builtin_vsx_lxvd2x:
8157       ID = Intrinsic::ppc_vsx_lxvd2x;
8158       break;
8159     case PPC::BI__builtin_vsx_lxvw4x:
8160       ID = Intrinsic::ppc_vsx_lxvw4x;
8161       break;
8162     case PPC::BI__builtin_vsx_lxvd2x_be:
8163       ID = Intrinsic::ppc_vsx_lxvd2x_be;
8164       break;
8165     case PPC::BI__builtin_vsx_lxvw4x_be:
8166       ID = Intrinsic::ppc_vsx_lxvw4x_be;
8167       break;
8168     case PPC::BI__builtin_vsx_lxvl:
8169       ID = Intrinsic::ppc_vsx_lxvl;
8170       break;
8171     case PPC::BI__builtin_vsx_lxvll:
8172       ID = Intrinsic::ppc_vsx_lxvll;
8173       break;
8174     }
8175     llvm::Function *F = CGM.getIntrinsic(ID);
8176     return Builder.CreateCall(F, Ops, "");
8177   }
8178 
8179   // vec_st, vec_xst_be
8180   case PPC::BI__builtin_altivec_stvx:
8181   case PPC::BI__builtin_altivec_stvxl:
8182   case PPC::BI__builtin_altivec_stvebx:
8183   case PPC::BI__builtin_altivec_stvehx:
8184   case PPC::BI__builtin_altivec_stvewx:
8185   case PPC::BI__builtin_vsx_stxvd2x:
8186   case PPC::BI__builtin_vsx_stxvw4x:
8187   case PPC::BI__builtin_vsx_stxvd2x_be:
8188   case PPC::BI__builtin_vsx_stxvw4x_be:
8189   case PPC::BI__builtin_vsx_stxvl:
8190   case PPC::BI__builtin_vsx_stxvll:
8191   {
8192     if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
8193       BuiltinID == PPC::BI__builtin_vsx_stxvll ){
8194       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8195     }else {
8196       Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
8197       Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
8198       Ops.pop_back();
8199     }
8200 
8201     switch (BuiltinID) {
8202     default: llvm_unreachable("Unsupported st intrinsic!");
8203     case PPC::BI__builtin_altivec_stvx:
8204       ID = Intrinsic::ppc_altivec_stvx;
8205       break;
8206     case PPC::BI__builtin_altivec_stvxl:
8207       ID = Intrinsic::ppc_altivec_stvxl;
8208       break;
8209     case PPC::BI__builtin_altivec_stvebx:
8210       ID = Intrinsic::ppc_altivec_stvebx;
8211       break;
8212     case PPC::BI__builtin_altivec_stvehx:
8213       ID = Intrinsic::ppc_altivec_stvehx;
8214       break;
8215     case PPC::BI__builtin_altivec_stvewx:
8216       ID = Intrinsic::ppc_altivec_stvewx;
8217       break;
8218     case PPC::BI__builtin_vsx_stxvd2x:
8219       ID = Intrinsic::ppc_vsx_stxvd2x;
8220       break;
8221     case PPC::BI__builtin_vsx_stxvw4x:
8222       ID = Intrinsic::ppc_vsx_stxvw4x;
8223       break;
8224     case PPC::BI__builtin_vsx_stxvd2x_be:
8225       ID = Intrinsic::ppc_vsx_stxvd2x_be;
8226       break;
8227     case PPC::BI__builtin_vsx_stxvw4x_be:
8228       ID = Intrinsic::ppc_vsx_stxvw4x_be;
8229       break;
8230     case PPC::BI__builtin_vsx_stxvl:
8231       ID = Intrinsic::ppc_vsx_stxvl;
8232       break;
8233     case PPC::BI__builtin_vsx_stxvll:
8234       ID = Intrinsic::ppc_vsx_stxvll;
8235       break;
8236     }
8237     llvm::Function *F = CGM.getIntrinsic(ID);
8238     return Builder.CreateCall(F, Ops, "");
8239   }
8240   // Square root
8241   case PPC::BI__builtin_vsx_xvsqrtsp:
8242   case PPC::BI__builtin_vsx_xvsqrtdp: {
8243     llvm::Type *ResultType = ConvertType(E->getType());
8244     Value *X = EmitScalarExpr(E->getArg(0));
8245     ID = Intrinsic::sqrt;
8246     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8247     return Builder.CreateCall(F, X);
8248   }
8249   // Count leading zeros
8250   case PPC::BI__builtin_altivec_vclzb:
8251   case PPC::BI__builtin_altivec_vclzh:
8252   case PPC::BI__builtin_altivec_vclzw:
8253   case PPC::BI__builtin_altivec_vclzd: {
8254     llvm::Type *ResultType = ConvertType(E->getType());
8255     Value *X = EmitScalarExpr(E->getArg(0));
8256     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8257     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8258     return Builder.CreateCall(F, {X, Undef});
8259   }
8260   case PPC::BI__builtin_altivec_vctzb:
8261   case PPC::BI__builtin_altivec_vctzh:
8262   case PPC::BI__builtin_altivec_vctzw:
8263   case PPC::BI__builtin_altivec_vctzd: {
8264     llvm::Type *ResultType = ConvertType(E->getType());
8265     Value *X = EmitScalarExpr(E->getArg(0));
8266     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8267     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8268     return Builder.CreateCall(F, {X, Undef});
8269   }
8270   case PPC::BI__builtin_altivec_vpopcntb:
8271   case PPC::BI__builtin_altivec_vpopcnth:
8272   case PPC::BI__builtin_altivec_vpopcntw:
8273   case PPC::BI__builtin_altivec_vpopcntd: {
8274     llvm::Type *ResultType = ConvertType(E->getType());
8275     Value *X = EmitScalarExpr(E->getArg(0));
8276     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8277     return Builder.CreateCall(F, X);
8278   }
8279   // Copy sign
8280   case PPC::BI__builtin_vsx_xvcpsgnsp:
8281   case PPC::BI__builtin_vsx_xvcpsgndp: {
8282     llvm::Type *ResultType = ConvertType(E->getType());
8283     Value *X = EmitScalarExpr(E->getArg(0));
8284     Value *Y = EmitScalarExpr(E->getArg(1));
8285     ID = Intrinsic::copysign;
8286     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8287     return Builder.CreateCall(F, {X, Y});
8288   }
8289   // Rounding/truncation
8290   case PPC::BI__builtin_vsx_xvrspip:
8291   case PPC::BI__builtin_vsx_xvrdpip:
8292   case PPC::BI__builtin_vsx_xvrdpim:
8293   case PPC::BI__builtin_vsx_xvrspim:
8294   case PPC::BI__builtin_vsx_xvrdpi:
8295   case PPC::BI__builtin_vsx_xvrspi:
8296   case PPC::BI__builtin_vsx_xvrdpic:
8297   case PPC::BI__builtin_vsx_xvrspic:
8298   case PPC::BI__builtin_vsx_xvrdpiz:
8299   case PPC::BI__builtin_vsx_xvrspiz: {
8300     llvm::Type *ResultType = ConvertType(E->getType());
8301     Value *X = EmitScalarExpr(E->getArg(0));
8302     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
8303         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
8304       ID = Intrinsic::floor;
8305     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
8306              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
8307       ID = Intrinsic::round;
8308     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
8309              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
8310       ID = Intrinsic::nearbyint;
8311     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
8312              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
8313       ID = Intrinsic::ceil;
8314     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
8315              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
8316       ID = Intrinsic::trunc;
8317     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8318     return Builder.CreateCall(F, X);
8319   }
8320 
8321   // Absolute value
8322   case PPC::BI__builtin_vsx_xvabsdp:
8323   case PPC::BI__builtin_vsx_xvabssp: {
8324     llvm::Type *ResultType = ConvertType(E->getType());
8325     Value *X = EmitScalarExpr(E->getArg(0));
8326     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8327     return Builder.CreateCall(F, X);
8328   }
8329 
8330   // FMA variations
8331   case PPC::BI__builtin_vsx_xvmaddadp:
8332   case PPC::BI__builtin_vsx_xvmaddasp:
8333   case PPC::BI__builtin_vsx_xvnmaddadp:
8334   case PPC::BI__builtin_vsx_xvnmaddasp:
8335   case PPC::BI__builtin_vsx_xvmsubadp:
8336   case PPC::BI__builtin_vsx_xvmsubasp:
8337   case PPC::BI__builtin_vsx_xvnmsubadp:
8338   case PPC::BI__builtin_vsx_xvnmsubasp: {
8339     llvm::Type *ResultType = ConvertType(E->getType());
8340     Value *X = EmitScalarExpr(E->getArg(0));
8341     Value *Y = EmitScalarExpr(E->getArg(1));
8342     Value *Z = EmitScalarExpr(E->getArg(2));
8343     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8344     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8345     switch (BuiltinID) {
8346       case PPC::BI__builtin_vsx_xvmaddadp:
8347       case PPC::BI__builtin_vsx_xvmaddasp:
8348         return Builder.CreateCall(F, {X, Y, Z});
8349       case PPC::BI__builtin_vsx_xvnmaddadp:
8350       case PPC::BI__builtin_vsx_xvnmaddasp:
8351         return Builder.CreateFSub(Zero,
8352                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
8353       case PPC::BI__builtin_vsx_xvmsubadp:
8354       case PPC::BI__builtin_vsx_xvmsubasp:
8355         return Builder.CreateCall(F,
8356                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8357       case PPC::BI__builtin_vsx_xvnmsubadp:
8358       case PPC::BI__builtin_vsx_xvnmsubasp:
8359         Value *FsubRes =
8360           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8361         return Builder.CreateFSub(Zero, FsubRes, "sub");
8362     }
8363     llvm_unreachable("Unknown FMA operation");
8364     return nullptr; // Suppress no-return warning
8365   }
8366 
8367   case PPC::BI__builtin_vsx_insertword: {
8368     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
8369 
8370     // Third argument is a compile time constant int. It must be clamped to
8371     // to the range [0, 12].
8372     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8373     assert(ArgCI &&
8374            "Third arg to xxinsertw intrinsic must be constant integer");
8375     const int64_t MaxIndex = 12;
8376     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8377 
8378     // The builtin semantics don't exactly match the xxinsertw instructions
8379     // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
8380     // word from the first argument, and inserts it in the second argument. The
8381     // instruction extracts the word from its second input register and inserts
8382     // it into its first input register, so swap the first and second arguments.
8383     std::swap(Ops[0], Ops[1]);
8384 
8385     // Need to cast the second argument from a vector of unsigned int to a
8386     // vector of long long.
8387     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8388 
8389     if (getTarget().isLittleEndian()) {
8390       // Create a shuffle mask of (1, 0)
8391       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8392                                    ConstantInt::get(Int32Ty, 0)
8393                                  };
8394       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8395 
8396       // Reverse the double words in the vector we will extract from.
8397       Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8398       Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
8399 
8400       // Reverse the index.
8401       Index = MaxIndex - Index;
8402     }
8403 
8404     // Intrinsic expects the first arg to be a vector of int.
8405     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8406     Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
8407     return Builder.CreateCall(F, Ops);
8408   }
8409 
8410   case PPC::BI__builtin_vsx_extractuword: {
8411     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
8412 
8413     // Intrinsic expects the first argument to be a vector of doublewords.
8414     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8415 
8416     // The second argument is a compile time constant int that needs to
8417     // be clamped to the range [0, 12].
8418     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
8419     assert(ArgCI &&
8420            "Second Arg to xxextractuw intrinsic must be a constant integer!");
8421     const int64_t MaxIndex = 12;
8422     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8423 
8424     if (getTarget().isLittleEndian()) {
8425       // Reverse the index.
8426       Index = MaxIndex - Index;
8427       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8428 
8429       // Emit the call, then reverse the double words of the results vector.
8430       Value *Call = Builder.CreateCall(F, Ops);
8431 
8432       // Create a shuffle mask of (1, 0)
8433       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8434                                    ConstantInt::get(Int32Ty, 0)
8435                                  };
8436       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8437 
8438       Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
8439       return ShuffleCall;
8440     } else {
8441       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8442       return Builder.CreateCall(F, Ops);
8443     }
8444   }
8445   }
8446 }
8447 
8448 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
8449                                               const CallExpr *E) {
8450   switch (BuiltinID) {
8451   case AMDGPU::BI__builtin_amdgcn_div_scale:
8452   case AMDGPU::BI__builtin_amdgcn_div_scalef: {
8453     // Translate from the intrinsics's struct return to the builtin's out
8454     // argument.
8455 
8456     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
8457 
8458     llvm::Value *X = EmitScalarExpr(E->getArg(0));
8459     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
8460     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
8461 
8462     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
8463                                            X->getType());
8464 
8465     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
8466 
8467     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
8468     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
8469 
8470     llvm::Type *RealFlagType
8471       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
8472 
8473     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
8474     Builder.CreateStore(FlagExt, FlagOutPtr);
8475     return Result;
8476   }
8477   case AMDGPU::BI__builtin_amdgcn_div_fmas:
8478   case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
8479     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
8480     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
8481     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
8482     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
8483 
8484     llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
8485                                       Src0->getType());
8486     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
8487     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
8488   }
8489 
8490   case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
8491     return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
8492   case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
8493     llvm::SmallVector<llvm::Value *, 5> Args;
8494     for (unsigned I = 0; I != 5; ++I)
8495       Args.push_back(EmitScalarExpr(E->getArg(I)));
8496     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,
8497                                     Args[0]->getType());
8498     return Builder.CreateCall(F, Args);
8499   }
8500   case AMDGPU::BI__builtin_amdgcn_div_fixup:
8501   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
8502   case AMDGPU::BI__builtin_amdgcn_div_fixuph:
8503     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
8504   case AMDGPU::BI__builtin_amdgcn_trig_preop:
8505   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
8506     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
8507   case AMDGPU::BI__builtin_amdgcn_rcp:
8508   case AMDGPU::BI__builtin_amdgcn_rcpf:
8509   case AMDGPU::BI__builtin_amdgcn_rcph:
8510     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
8511   case AMDGPU::BI__builtin_amdgcn_rsq:
8512   case AMDGPU::BI__builtin_amdgcn_rsqf:
8513   case AMDGPU::BI__builtin_amdgcn_rsqh:
8514     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
8515   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
8516   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
8517     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
8518   case AMDGPU::BI__builtin_amdgcn_sinf:
8519   case AMDGPU::BI__builtin_amdgcn_sinh:
8520     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
8521   case AMDGPU::BI__builtin_amdgcn_cosf:
8522   case AMDGPU::BI__builtin_amdgcn_cosh:
8523     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
8524   case AMDGPU::BI__builtin_amdgcn_log_clampf:
8525     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
8526   case AMDGPU::BI__builtin_amdgcn_ldexp:
8527   case AMDGPU::BI__builtin_amdgcn_ldexpf:
8528   case AMDGPU::BI__builtin_amdgcn_ldexph:
8529     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
8530   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
8531   case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
8532   case AMDGPU::BI__builtin_amdgcn_frexp_manth:
8533     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
8534   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
8535   case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
8536     Value *Src0 = EmitScalarExpr(E->getArg(0));
8537     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8538                                 { Builder.getInt32Ty(), Src0->getType() });
8539     return Builder.CreateCall(F, Src0);
8540   }
8541   case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
8542     Value *Src0 = EmitScalarExpr(E->getArg(0));
8543     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8544                                 { Builder.getInt16Ty(), Src0->getType() });
8545     return Builder.CreateCall(F, Src0);
8546   }
8547   case AMDGPU::BI__builtin_amdgcn_fract:
8548   case AMDGPU::BI__builtin_amdgcn_fractf:
8549   case AMDGPU::BI__builtin_amdgcn_fracth:
8550     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
8551   case AMDGPU::BI__builtin_amdgcn_lerp:
8552     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
8553   case AMDGPU::BI__builtin_amdgcn_uicmp:
8554   case AMDGPU::BI__builtin_amdgcn_uicmpl:
8555   case AMDGPU::BI__builtin_amdgcn_sicmp:
8556   case AMDGPU::BI__builtin_amdgcn_sicmpl:
8557     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
8558   case AMDGPU::BI__builtin_amdgcn_fcmp:
8559   case AMDGPU::BI__builtin_amdgcn_fcmpf:
8560     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
8561   case AMDGPU::BI__builtin_amdgcn_class:
8562   case AMDGPU::BI__builtin_amdgcn_classf:
8563   case AMDGPU::BI__builtin_amdgcn_classh:
8564     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
8565   case AMDGPU::BI__builtin_amdgcn_fmed3f:
8566   case AMDGPU::BI__builtin_amdgcn_fmed3h:
8567     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
8568   case AMDGPU::BI__builtin_amdgcn_read_exec: {
8569     CallInst *CI = cast<CallInst>(
8570       EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
8571     CI->setConvergent();
8572     return CI;
8573   }
8574 
8575   // amdgcn workitem
8576   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
8577     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
8578   case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
8579     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
8580   case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
8581     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
8582 
8583   // r600 intrinsics
8584   case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
8585   case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
8586     return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
8587   case AMDGPU::BI__builtin_r600_read_tidig_x:
8588     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
8589   case AMDGPU::BI__builtin_r600_read_tidig_y:
8590     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
8591   case AMDGPU::BI__builtin_r600_read_tidig_z:
8592     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
8593   default:
8594     return nullptr;
8595   }
8596 }
8597 
8598 /// Handle a SystemZ function in which the final argument is a pointer
8599 /// to an int that receives the post-instruction CC value.  At the LLVM level
8600 /// this is represented as a function that returns a {result, cc} pair.
8601 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
8602                                          unsigned IntrinsicID,
8603                                          const CallExpr *E) {
8604   unsigned NumArgs = E->getNumArgs() - 1;
8605   SmallVector<Value *, 8> Args(NumArgs);
8606   for (unsigned I = 0; I < NumArgs; ++I)
8607     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
8608   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
8609   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
8610   Value *Call = CGF.Builder.CreateCall(F, Args);
8611   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
8612   CGF.Builder.CreateStore(CC, CCPtr);
8613   return CGF.Builder.CreateExtractValue(Call, 0);
8614 }
8615 
8616 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
8617                                                const CallExpr *E) {
8618   switch (BuiltinID) {
8619   case SystemZ::BI__builtin_tbegin: {
8620     Value *TDB = EmitScalarExpr(E->getArg(0));
8621     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8622     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
8623     return Builder.CreateCall(F, {TDB, Control});
8624   }
8625   case SystemZ::BI__builtin_tbegin_nofloat: {
8626     Value *TDB = EmitScalarExpr(E->getArg(0));
8627     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8628     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
8629     return Builder.CreateCall(F, {TDB, Control});
8630   }
8631   case SystemZ::BI__builtin_tbeginc: {
8632     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
8633     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
8634     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
8635     return Builder.CreateCall(F, {TDB, Control});
8636   }
8637   case SystemZ::BI__builtin_tabort: {
8638     Value *Data = EmitScalarExpr(E->getArg(0));
8639     Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
8640     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
8641   }
8642   case SystemZ::BI__builtin_non_tx_store: {
8643     Value *Address = EmitScalarExpr(E->getArg(0));
8644     Value *Data = EmitScalarExpr(E->getArg(1));
8645     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
8646     return Builder.CreateCall(F, {Data, Address});
8647   }
8648 
8649   // Vector builtins.  Note that most vector builtins are mapped automatically
8650   // to target-specific LLVM intrinsics.  The ones handled specially here can
8651   // be represented via standard LLVM IR, which is preferable to enable common
8652   // LLVM optimizations.
8653 
8654   case SystemZ::BI__builtin_s390_vpopctb:
8655   case SystemZ::BI__builtin_s390_vpopcth:
8656   case SystemZ::BI__builtin_s390_vpopctf:
8657   case SystemZ::BI__builtin_s390_vpopctg: {
8658     llvm::Type *ResultType = ConvertType(E->getType());
8659     Value *X = EmitScalarExpr(E->getArg(0));
8660     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8661     return Builder.CreateCall(F, X);
8662   }
8663 
8664   case SystemZ::BI__builtin_s390_vclzb:
8665   case SystemZ::BI__builtin_s390_vclzh:
8666   case SystemZ::BI__builtin_s390_vclzf:
8667   case SystemZ::BI__builtin_s390_vclzg: {
8668     llvm::Type *ResultType = ConvertType(E->getType());
8669     Value *X = EmitScalarExpr(E->getArg(0));
8670     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8671     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8672     return Builder.CreateCall(F, {X, Undef});
8673   }
8674 
8675   case SystemZ::BI__builtin_s390_vctzb:
8676   case SystemZ::BI__builtin_s390_vctzh:
8677   case SystemZ::BI__builtin_s390_vctzf:
8678   case SystemZ::BI__builtin_s390_vctzg: {
8679     llvm::Type *ResultType = ConvertType(E->getType());
8680     Value *X = EmitScalarExpr(E->getArg(0));
8681     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8682     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8683     return Builder.CreateCall(F, {X, Undef});
8684   }
8685 
8686   case SystemZ::BI__builtin_s390_vfsqdb: {
8687     llvm::Type *ResultType = ConvertType(E->getType());
8688     Value *X = EmitScalarExpr(E->getArg(0));
8689     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
8690     return Builder.CreateCall(F, X);
8691   }
8692   case SystemZ::BI__builtin_s390_vfmadb: {
8693     llvm::Type *ResultType = ConvertType(E->getType());
8694     Value *X = EmitScalarExpr(E->getArg(0));
8695     Value *Y = EmitScalarExpr(E->getArg(1));
8696     Value *Z = EmitScalarExpr(E->getArg(2));
8697     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8698     return Builder.CreateCall(F, {X, Y, Z});
8699   }
8700   case SystemZ::BI__builtin_s390_vfmsdb: {
8701     llvm::Type *ResultType = ConvertType(E->getType());
8702     Value *X = EmitScalarExpr(E->getArg(0));
8703     Value *Y = EmitScalarExpr(E->getArg(1));
8704     Value *Z = EmitScalarExpr(E->getArg(2));
8705     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8706     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8707     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8708   }
8709   case SystemZ::BI__builtin_s390_vflpdb: {
8710     llvm::Type *ResultType = ConvertType(E->getType());
8711     Value *X = EmitScalarExpr(E->getArg(0));
8712     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8713     return Builder.CreateCall(F, X);
8714   }
8715   case SystemZ::BI__builtin_s390_vflndb: {
8716     llvm::Type *ResultType = ConvertType(E->getType());
8717     Value *X = EmitScalarExpr(E->getArg(0));
8718     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8719     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8720     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
8721   }
8722   case SystemZ::BI__builtin_s390_vfidb: {
8723     llvm::Type *ResultType = ConvertType(E->getType());
8724     Value *X = EmitScalarExpr(E->getArg(0));
8725     // Constant-fold the M4 and M5 mask arguments.
8726     llvm::APSInt M4, M5;
8727     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
8728     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
8729     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
8730     (void)IsConstM4; (void)IsConstM5;
8731     // Check whether this instance of vfidb can be represented via a LLVM
8732     // standard intrinsic.  We only support some combinations of M4 and M5.
8733     Intrinsic::ID ID = Intrinsic::not_intrinsic;
8734     switch (M4.getZExtValue()) {
8735     default: break;
8736     case 0:  // IEEE-inexact exception allowed
8737       switch (M5.getZExtValue()) {
8738       default: break;
8739       case 0: ID = Intrinsic::rint; break;
8740       }
8741       break;
8742     case 4:  // IEEE-inexact exception suppressed
8743       switch (M5.getZExtValue()) {
8744       default: break;
8745       case 0: ID = Intrinsic::nearbyint; break;
8746       case 1: ID = Intrinsic::round; break;
8747       case 5: ID = Intrinsic::trunc; break;
8748       case 6: ID = Intrinsic::ceil; break;
8749       case 7: ID = Intrinsic::floor; break;
8750       }
8751       break;
8752     }
8753     if (ID != Intrinsic::not_intrinsic) {
8754       Function *F = CGM.getIntrinsic(ID, ResultType);
8755       return Builder.CreateCall(F, X);
8756     }
8757     Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
8758     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
8759     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
8760     return Builder.CreateCall(F, {X, M4Value, M5Value});
8761   }
8762 
8763   // Vector intrisincs that output the post-instruction CC value.
8764 
8765 #define INTRINSIC_WITH_CC(NAME) \
8766     case SystemZ::BI__builtin_##NAME: \
8767       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
8768 
8769   INTRINSIC_WITH_CC(s390_vpkshs);
8770   INTRINSIC_WITH_CC(s390_vpksfs);
8771   INTRINSIC_WITH_CC(s390_vpksgs);
8772 
8773   INTRINSIC_WITH_CC(s390_vpklshs);
8774   INTRINSIC_WITH_CC(s390_vpklsfs);
8775   INTRINSIC_WITH_CC(s390_vpklsgs);
8776 
8777   INTRINSIC_WITH_CC(s390_vceqbs);
8778   INTRINSIC_WITH_CC(s390_vceqhs);
8779   INTRINSIC_WITH_CC(s390_vceqfs);
8780   INTRINSIC_WITH_CC(s390_vceqgs);
8781 
8782   INTRINSIC_WITH_CC(s390_vchbs);
8783   INTRINSIC_WITH_CC(s390_vchhs);
8784   INTRINSIC_WITH_CC(s390_vchfs);
8785   INTRINSIC_WITH_CC(s390_vchgs);
8786 
8787   INTRINSIC_WITH_CC(s390_vchlbs);
8788   INTRINSIC_WITH_CC(s390_vchlhs);
8789   INTRINSIC_WITH_CC(s390_vchlfs);
8790   INTRINSIC_WITH_CC(s390_vchlgs);
8791 
8792   INTRINSIC_WITH_CC(s390_vfaebs);
8793   INTRINSIC_WITH_CC(s390_vfaehs);
8794   INTRINSIC_WITH_CC(s390_vfaefs);
8795 
8796   INTRINSIC_WITH_CC(s390_vfaezbs);
8797   INTRINSIC_WITH_CC(s390_vfaezhs);
8798   INTRINSIC_WITH_CC(s390_vfaezfs);
8799 
8800   INTRINSIC_WITH_CC(s390_vfeebs);
8801   INTRINSIC_WITH_CC(s390_vfeehs);
8802   INTRINSIC_WITH_CC(s390_vfeefs);
8803 
8804   INTRINSIC_WITH_CC(s390_vfeezbs);
8805   INTRINSIC_WITH_CC(s390_vfeezhs);
8806   INTRINSIC_WITH_CC(s390_vfeezfs);
8807 
8808   INTRINSIC_WITH_CC(s390_vfenebs);
8809   INTRINSIC_WITH_CC(s390_vfenehs);
8810   INTRINSIC_WITH_CC(s390_vfenefs);
8811 
8812   INTRINSIC_WITH_CC(s390_vfenezbs);
8813   INTRINSIC_WITH_CC(s390_vfenezhs);
8814   INTRINSIC_WITH_CC(s390_vfenezfs);
8815 
8816   INTRINSIC_WITH_CC(s390_vistrbs);
8817   INTRINSIC_WITH_CC(s390_vistrhs);
8818   INTRINSIC_WITH_CC(s390_vistrfs);
8819 
8820   INTRINSIC_WITH_CC(s390_vstrcbs);
8821   INTRINSIC_WITH_CC(s390_vstrchs);
8822   INTRINSIC_WITH_CC(s390_vstrcfs);
8823 
8824   INTRINSIC_WITH_CC(s390_vstrczbs);
8825   INTRINSIC_WITH_CC(s390_vstrczhs);
8826   INTRINSIC_WITH_CC(s390_vstrczfs);
8827 
8828   INTRINSIC_WITH_CC(s390_vfcedbs);
8829   INTRINSIC_WITH_CC(s390_vfchdbs);
8830   INTRINSIC_WITH_CC(s390_vfchedbs);
8831 
8832   INTRINSIC_WITH_CC(s390_vftcidb);
8833 
8834 #undef INTRINSIC_WITH_CC
8835 
8836   default:
8837     return nullptr;
8838   }
8839 }
8840 
8841 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
8842                                              const CallExpr *E) {
8843   auto MakeLdg = [&](unsigned IntrinsicID) {
8844     Value *Ptr = EmitScalarExpr(E->getArg(0));
8845     AlignmentSource AlignSource;
8846     clang::CharUnits Align =
8847         getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource);
8848     return Builder.CreateCall(
8849         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
8850                                        Ptr->getType()}),
8851         {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
8852   };
8853   auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
8854     Value *Ptr = EmitScalarExpr(E->getArg(0));
8855     return Builder.CreateCall(
8856         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
8857                                        Ptr->getType()}),
8858         {Ptr, EmitScalarExpr(E->getArg(1))});
8859   };
8860   switch (BuiltinID) {
8861   case NVPTX::BI__nvvm_atom_add_gen_i:
8862   case NVPTX::BI__nvvm_atom_add_gen_l:
8863   case NVPTX::BI__nvvm_atom_add_gen_ll:
8864     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
8865 
8866   case NVPTX::BI__nvvm_atom_sub_gen_i:
8867   case NVPTX::BI__nvvm_atom_sub_gen_l:
8868   case NVPTX::BI__nvvm_atom_sub_gen_ll:
8869     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
8870 
8871   case NVPTX::BI__nvvm_atom_and_gen_i:
8872   case NVPTX::BI__nvvm_atom_and_gen_l:
8873   case NVPTX::BI__nvvm_atom_and_gen_ll:
8874     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
8875 
8876   case NVPTX::BI__nvvm_atom_or_gen_i:
8877   case NVPTX::BI__nvvm_atom_or_gen_l:
8878   case NVPTX::BI__nvvm_atom_or_gen_ll:
8879     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
8880 
8881   case NVPTX::BI__nvvm_atom_xor_gen_i:
8882   case NVPTX::BI__nvvm_atom_xor_gen_l:
8883   case NVPTX::BI__nvvm_atom_xor_gen_ll:
8884     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
8885 
8886   case NVPTX::BI__nvvm_atom_xchg_gen_i:
8887   case NVPTX::BI__nvvm_atom_xchg_gen_l:
8888   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
8889     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
8890 
8891   case NVPTX::BI__nvvm_atom_max_gen_i:
8892   case NVPTX::BI__nvvm_atom_max_gen_l:
8893   case NVPTX::BI__nvvm_atom_max_gen_ll:
8894     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
8895 
8896   case NVPTX::BI__nvvm_atom_max_gen_ui:
8897   case NVPTX::BI__nvvm_atom_max_gen_ul:
8898   case NVPTX::BI__nvvm_atom_max_gen_ull:
8899     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
8900 
8901   case NVPTX::BI__nvvm_atom_min_gen_i:
8902   case NVPTX::BI__nvvm_atom_min_gen_l:
8903   case NVPTX::BI__nvvm_atom_min_gen_ll:
8904     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
8905 
8906   case NVPTX::BI__nvvm_atom_min_gen_ui:
8907   case NVPTX::BI__nvvm_atom_min_gen_ul:
8908   case NVPTX::BI__nvvm_atom_min_gen_ull:
8909     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
8910 
8911   case NVPTX::BI__nvvm_atom_cas_gen_i:
8912   case NVPTX::BI__nvvm_atom_cas_gen_l:
8913   case NVPTX::BI__nvvm_atom_cas_gen_ll:
8914     // __nvvm_atom_cas_gen_* should return the old value rather than the
8915     // success flag.
8916     return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
8917 
8918   case NVPTX::BI__nvvm_atom_add_gen_f: {
8919     Value *Ptr = EmitScalarExpr(E->getArg(0));
8920     Value *Val = EmitScalarExpr(E->getArg(1));
8921     // atomicrmw only deals with integer arguments so we need to use
8922     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
8923     Value *FnALAF32 =
8924         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
8925     return Builder.CreateCall(FnALAF32, {Ptr, Val});
8926   }
8927 
8928   case NVPTX::BI__nvvm_atom_inc_gen_ui: {
8929     Value *Ptr = EmitScalarExpr(E->getArg(0));
8930     Value *Val = EmitScalarExpr(E->getArg(1));
8931     Value *FnALI32 =
8932         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
8933     return Builder.CreateCall(FnALI32, {Ptr, Val});
8934   }
8935 
8936   case NVPTX::BI__nvvm_atom_dec_gen_ui: {
8937     Value *Ptr = EmitScalarExpr(E->getArg(0));
8938     Value *Val = EmitScalarExpr(E->getArg(1));
8939     Value *FnALD32 =
8940         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
8941     return Builder.CreateCall(FnALD32, {Ptr, Val});
8942   }
8943 
8944   case NVPTX::BI__nvvm_ldg_c:
8945   case NVPTX::BI__nvvm_ldg_c2:
8946   case NVPTX::BI__nvvm_ldg_c4:
8947   case NVPTX::BI__nvvm_ldg_s:
8948   case NVPTX::BI__nvvm_ldg_s2:
8949   case NVPTX::BI__nvvm_ldg_s4:
8950   case NVPTX::BI__nvvm_ldg_i:
8951   case NVPTX::BI__nvvm_ldg_i2:
8952   case NVPTX::BI__nvvm_ldg_i4:
8953   case NVPTX::BI__nvvm_ldg_l:
8954   case NVPTX::BI__nvvm_ldg_ll:
8955   case NVPTX::BI__nvvm_ldg_ll2:
8956   case NVPTX::BI__nvvm_ldg_uc:
8957   case NVPTX::BI__nvvm_ldg_uc2:
8958   case NVPTX::BI__nvvm_ldg_uc4:
8959   case NVPTX::BI__nvvm_ldg_us:
8960   case NVPTX::BI__nvvm_ldg_us2:
8961   case NVPTX::BI__nvvm_ldg_us4:
8962   case NVPTX::BI__nvvm_ldg_ui:
8963   case NVPTX::BI__nvvm_ldg_ui2:
8964   case NVPTX::BI__nvvm_ldg_ui4:
8965   case NVPTX::BI__nvvm_ldg_ul:
8966   case NVPTX::BI__nvvm_ldg_ull:
8967   case NVPTX::BI__nvvm_ldg_ull2:
8968     // PTX Interoperability section 2.2: "For a vector with an even number of
8969     // elements, its alignment is set to number of elements times the alignment
8970     // of its member: n*alignof(t)."
8971     return MakeLdg(Intrinsic::nvvm_ldg_global_i);
8972   case NVPTX::BI__nvvm_ldg_f:
8973   case NVPTX::BI__nvvm_ldg_f2:
8974   case NVPTX::BI__nvvm_ldg_f4:
8975   case NVPTX::BI__nvvm_ldg_d:
8976   case NVPTX::BI__nvvm_ldg_d2:
8977     return MakeLdg(Intrinsic::nvvm_ldg_global_f);
8978 
8979   case NVPTX::BI__nvvm_atom_cta_add_gen_i:
8980   case NVPTX::BI__nvvm_atom_cta_add_gen_l:
8981   case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
8982     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
8983   case NVPTX::BI__nvvm_atom_sys_add_gen_i:
8984   case NVPTX::BI__nvvm_atom_sys_add_gen_l:
8985   case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
8986     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
8987   case NVPTX::BI__nvvm_atom_cta_add_gen_f:
8988   case NVPTX::BI__nvvm_atom_cta_add_gen_d:
8989     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
8990   case NVPTX::BI__nvvm_atom_sys_add_gen_f:
8991   case NVPTX::BI__nvvm_atom_sys_add_gen_d:
8992     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
8993   case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
8994   case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
8995   case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
8996     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
8997   case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
8998   case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
8999   case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
9000     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
9001   case NVPTX::BI__nvvm_atom_cta_max_gen_i:
9002   case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
9003   case NVPTX::BI__nvvm_atom_cta_max_gen_l:
9004   case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
9005   case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
9006   case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
9007     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
9008   case NVPTX::BI__nvvm_atom_sys_max_gen_i:
9009   case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
9010   case NVPTX::BI__nvvm_atom_sys_max_gen_l:
9011   case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
9012   case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
9013   case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
9014     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
9015   case NVPTX::BI__nvvm_atom_cta_min_gen_i:
9016   case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
9017   case NVPTX::BI__nvvm_atom_cta_min_gen_l:
9018   case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
9019   case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
9020   case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
9021     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
9022   case NVPTX::BI__nvvm_atom_sys_min_gen_i:
9023   case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
9024   case NVPTX::BI__nvvm_atom_sys_min_gen_l:
9025   case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
9026   case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
9027   case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
9028     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
9029   case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
9030     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
9031   case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
9032     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
9033   case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
9034     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
9035   case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
9036     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
9037   case NVPTX::BI__nvvm_atom_cta_and_gen_i:
9038   case NVPTX::BI__nvvm_atom_cta_and_gen_l:
9039   case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
9040     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
9041   case NVPTX::BI__nvvm_atom_sys_and_gen_i:
9042   case NVPTX::BI__nvvm_atom_sys_and_gen_l:
9043   case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
9044     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
9045   case NVPTX::BI__nvvm_atom_cta_or_gen_i:
9046   case NVPTX::BI__nvvm_atom_cta_or_gen_l:
9047   case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
9048     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
9049   case NVPTX::BI__nvvm_atom_sys_or_gen_i:
9050   case NVPTX::BI__nvvm_atom_sys_or_gen_l:
9051   case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
9052     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
9053   case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
9054   case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
9055   case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
9056     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
9057   case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
9058   case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
9059   case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
9060     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
9061   case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
9062   case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
9063   case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
9064     Value *Ptr = EmitScalarExpr(E->getArg(0));
9065     return Builder.CreateCall(
9066         CGM.getIntrinsic(
9067             Intrinsic::nvvm_atomic_cas_gen_i_cta,
9068             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9069         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9070   }
9071   case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
9072   case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
9073   case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
9074     Value *Ptr = EmitScalarExpr(E->getArg(0));
9075     return Builder.CreateCall(
9076         CGM.getIntrinsic(
9077             Intrinsic::nvvm_atomic_cas_gen_i_sys,
9078             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9079         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9080   }
9081   default:
9082     return nullptr;
9083   }
9084 }
9085 
9086 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
9087                                                    const CallExpr *E) {
9088   switch (BuiltinID) {
9089   case WebAssembly::BI__builtin_wasm_current_memory: {
9090     llvm::Type *ResultType = ConvertType(E->getType());
9091     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
9092     return Builder.CreateCall(Callee);
9093   }
9094   case WebAssembly::BI__builtin_wasm_grow_memory: {
9095     Value *X = EmitScalarExpr(E->getArg(0));
9096     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
9097     return Builder.CreateCall(Callee, X);
9098   }
9099 
9100   default:
9101     return nullptr;
9102   }
9103 }
9104