1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CodeGenFunction.h"
15 #include "CGCXXABI.h"
16 #include "CGObjCRuntime.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/ASTContext.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/Basic/TargetBuiltins.h"
22 #include "clang/Basic/TargetInfo.h"
23 #include "clang/CodeGen/CGFunctionInfo.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/IR/CallSite.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/InlineAsm.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include "llvm/IR/MDBuilder.h"
30 #include <sstream>
31 
32 using namespace clang;
33 using namespace CodeGen;
34 using namespace llvm;
35 
36 /// getBuiltinLibFunction - Given a builtin id for a function like
37 /// "__builtin_fabsf", return a Function* for "fabsf".
38 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
39                                                   unsigned BuiltinID) {
40   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
41 
42   // Get the name, skip over the __builtin_ prefix (if necessary).
43   StringRef Name;
44   GlobalDecl D(FD);
45 
46   // If the builtin has been declared explicitly with an assembler label,
47   // use the mangled name. This differs from the plain label on platforms
48   // that prefix labels.
49   if (FD->hasAttr<AsmLabelAttr>())
50     Name = getMangledName(D);
51   else
52     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
53 
54   llvm::FunctionType *Ty =
55     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
56 
57   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
58 }
59 
60 /// Emit the conversions required to turn the given value into an
61 /// integer of the given size.
62 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
63                         QualType T, llvm::IntegerType *IntType) {
64   V = CGF.EmitToMemory(V, T);
65 
66   if (V->getType()->isPointerTy())
67     return CGF.Builder.CreatePtrToInt(V, IntType);
68 
69   assert(V->getType() == IntType);
70   return V;
71 }
72 
73 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
74                           QualType T, llvm::Type *ResultType) {
75   V = CGF.EmitFromMemory(V, T);
76 
77   if (ResultType->isPointerTy())
78     return CGF.Builder.CreateIntToPtr(V, ResultType);
79 
80   assert(V->getType() == ResultType);
81   return V;
82 }
83 
84 /// Utility to insert an atomic instruction based on Instrinsic::ID
85 /// and the expression node.
86 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
87                                     llvm::AtomicRMWInst::BinOp Kind,
88                                     const CallExpr *E) {
89   QualType T = E->getType();
90   assert(E->getArg(0)->getType()->isPointerType());
91   assert(CGF.getContext().hasSameUnqualifiedType(T,
92                                   E->getArg(0)->getType()->getPointeeType()));
93   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
94 
95   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
96   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
97 
98   llvm::IntegerType *IntType =
99     llvm::IntegerType::get(CGF.getLLVMContext(),
100                            CGF.getContext().getTypeSize(T));
101   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
102 
103   llvm::Value *Args[2];
104   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
105   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
106   llvm::Type *ValueType = Args[1]->getType();
107   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
108 
109   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
110       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
111   return EmitFromInt(CGF, Result, T, ValueType);
112 }
113 
114 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
115   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
116   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
117 
118   // Convert the type of the pointer to a pointer to the stored type.
119   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
120   Value *BC = CGF.Builder.CreateBitCast(
121       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
122   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
123   LV.setNontemporal(true);
124   CGF.EmitStoreOfScalar(Val, LV, false);
125   return nullptr;
126 }
127 
128 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
129   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
130 
131   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
132   LV.setNontemporal(true);
133   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
134 }
135 
136 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
137                                llvm::AtomicRMWInst::BinOp Kind,
138                                const CallExpr *E) {
139   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
140 }
141 
142 /// Utility to insert an atomic instruction based Instrinsic::ID and
143 /// the expression node, where the return value is the result of the
144 /// operation.
145 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
146                                    llvm::AtomicRMWInst::BinOp Kind,
147                                    const CallExpr *E,
148                                    Instruction::BinaryOps Op,
149                                    bool Invert = false) {
150   QualType T = E->getType();
151   assert(E->getArg(0)->getType()->isPointerType());
152   assert(CGF.getContext().hasSameUnqualifiedType(T,
153                                   E->getArg(0)->getType()->getPointeeType()));
154   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
155 
156   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
157   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
158 
159   llvm::IntegerType *IntType =
160     llvm::IntegerType::get(CGF.getLLVMContext(),
161                            CGF.getContext().getTypeSize(T));
162   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
163 
164   llvm::Value *Args[2];
165   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
166   llvm::Type *ValueType = Args[1]->getType();
167   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
168   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
169 
170   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
171       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
172   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
173   if (Invert)
174     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
175                                      llvm::ConstantInt::get(IntType, -1));
176   Result = EmitFromInt(CGF, Result, T, ValueType);
177   return RValue::get(Result);
178 }
179 
180 /// @brief Utility to insert an atomic cmpxchg instruction.
181 ///
182 /// @param CGF The current codegen function.
183 /// @param E   Builtin call expression to convert to cmpxchg.
184 ///            arg0 - address to operate on
185 ///            arg1 - value to compare with
186 ///            arg2 - new value
187 /// @param ReturnBool Specifies whether to return success flag of
188 ///                   cmpxchg result or the old value.
189 ///
190 /// @returns result of cmpxchg, according to ReturnBool
191 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
192                                      bool ReturnBool) {
193   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
194   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
195   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
196 
197   llvm::IntegerType *IntType = llvm::IntegerType::get(
198       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
199   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
200 
201   Value *Args[3];
202   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
203   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
204   llvm::Type *ValueType = Args[1]->getType();
205   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
206   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
207 
208   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
209       Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
210       llvm::AtomicOrdering::SequentiallyConsistent);
211   if (ReturnBool)
212     // Extract boolean success flag and zext it to int.
213     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
214                                   CGF.ConvertType(E->getType()));
215   else
216     // Extract old value and emit it using the same type as compare value.
217     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
218                        ValueType);
219 }
220 
221 // Emit a simple mangled intrinsic that has 1 argument and a return type
222 // matching the argument type.
223 static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
224                                const CallExpr *E,
225                                unsigned IntrinsicID) {
226   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
227 
228   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
229   return CGF.Builder.CreateCall(F, Src0);
230 }
231 
232 // Emit an intrinsic that has 2 operands of the same type as its result.
233 static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
234                                 const CallExpr *E,
235                                 unsigned IntrinsicID) {
236   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
237   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
238 
239   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
240   return CGF.Builder.CreateCall(F, { Src0, Src1 });
241 }
242 
243 // Emit an intrinsic that has 3 operands of the same type as its result.
244 static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
245                                  const CallExpr *E,
246                                  unsigned IntrinsicID) {
247   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
248   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
249   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
250 
251   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
252   return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
253 }
254 
255 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
256 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
257                                const CallExpr *E,
258                                unsigned IntrinsicID) {
259   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
260   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
261 
262   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
263   return CGF.Builder.CreateCall(F, {Src0, Src1});
264 }
265 
266 /// EmitFAbs - Emit a call to @llvm.fabs().
267 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
268   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
269   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
270   Call->setDoesNotAccessMemory();
271   return Call;
272 }
273 
274 /// Emit the computation of the sign bit for a floating point value. Returns
275 /// the i1 sign bit value.
276 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
277   LLVMContext &C = CGF.CGM.getLLVMContext();
278 
279   llvm::Type *Ty = V->getType();
280   int Width = Ty->getPrimitiveSizeInBits();
281   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
282   V = CGF.Builder.CreateBitCast(V, IntTy);
283   if (Ty->isPPC_FP128Ty()) {
284     // We want the sign bit of the higher-order double. The bitcast we just
285     // did works as if the double-double was stored to memory and then
286     // read as an i128. The "store" will put the higher-order double in the
287     // lower address in both little- and big-Endian modes, but the "load"
288     // will treat those bits as a different part of the i128: the low bits in
289     // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
290     // we need to shift the high bits down to the low before truncating.
291     Width >>= 1;
292     if (CGF.getTarget().isBigEndian()) {
293       Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
294       V = CGF.Builder.CreateLShr(V, ShiftCst);
295     }
296     // We are truncating value in order to extract the higher-order
297     // double, which we will be using to extract the sign from.
298     IntTy = llvm::IntegerType::get(C, Width);
299     V = CGF.Builder.CreateTrunc(V, IntTy);
300   }
301   Value *Zero = llvm::Constant::getNullValue(IntTy);
302   return CGF.Builder.CreateICmpSLT(V, Zero);
303 }
304 
305 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
306                               const CallExpr *E, llvm::Value *calleeValue) {
307   return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E,
308                       ReturnValueSlot(), Fn);
309 }
310 
311 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
312 /// depending on IntrinsicID.
313 ///
314 /// \arg CGF The current codegen function.
315 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
316 /// \arg X The first argument to the llvm.*.with.overflow.*.
317 /// \arg Y The second argument to the llvm.*.with.overflow.*.
318 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
319 /// \returns The result (i.e. sum/product) returned by the intrinsic.
320 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
321                                           const llvm::Intrinsic::ID IntrinsicID,
322                                           llvm::Value *X, llvm::Value *Y,
323                                           llvm::Value *&Carry) {
324   // Make sure we have integers of the same width.
325   assert(X->getType() == Y->getType() &&
326          "Arguments must be the same type. (Did you forget to make sure both "
327          "arguments have the same integer width?)");
328 
329   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
330   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
331   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
332   return CGF.Builder.CreateExtractValue(Tmp, 0);
333 }
334 
335 static Value *emitRangedBuiltin(CodeGenFunction &CGF,
336                                 unsigned IntrinsicID,
337                                 int low, int high) {
338     llvm::MDBuilder MDHelper(CGF.getLLVMContext());
339     llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
340     Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
341     llvm::Instruction *Call = CGF.Builder.CreateCall(F);
342     Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
343     return Call;
344 }
345 
346 namespace {
347   struct WidthAndSignedness {
348     unsigned Width;
349     bool Signed;
350   };
351 }
352 
353 static WidthAndSignedness
354 getIntegerWidthAndSignedness(const clang::ASTContext &context,
355                              const clang::QualType Type) {
356   assert(Type->isIntegerType() && "Given type is not an integer.");
357   unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
358   bool Signed = Type->isSignedIntegerType();
359   return {Width, Signed};
360 }
361 
362 // Given one or more integer types, this function produces an integer type that
363 // encompasses them: any value in one of the given types could be expressed in
364 // the encompassing type.
365 static struct WidthAndSignedness
366 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
367   assert(Types.size() > 0 && "Empty list of types.");
368 
369   // If any of the given types is signed, we must return a signed type.
370   bool Signed = false;
371   for (const auto &Type : Types) {
372     Signed |= Type.Signed;
373   }
374 
375   // The encompassing type must have a width greater than or equal to the width
376   // of the specified types.  Aditionally, if the encompassing type is signed,
377   // its width must be strictly greater than the width of any unsigned types
378   // given.
379   unsigned Width = 0;
380   for (const auto &Type : Types) {
381     unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
382     if (Width < MinWidth) {
383       Width = MinWidth;
384     }
385   }
386 
387   return {Width, Signed};
388 }
389 
390 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
391   llvm::Type *DestType = Int8PtrTy;
392   if (ArgValue->getType() != DestType)
393     ArgValue =
394         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
395 
396   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
397   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
398 }
399 
400 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
401 /// __builtin_object_size(p, @p To) is correct
402 static bool areBOSTypesCompatible(int From, int To) {
403   // Note: Our __builtin_object_size implementation currently treats Type=0 and
404   // Type=2 identically. Encoding this implementation detail here may make
405   // improving __builtin_object_size difficult in the future, so it's omitted.
406   return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
407 }
408 
409 static llvm::Value *
410 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
411   return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
412 }
413 
414 llvm::Value *
415 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
416                                                  llvm::IntegerType *ResType) {
417   uint64_t ObjectSize;
418   if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
419     return emitBuiltinObjectSize(E, Type, ResType);
420   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
421 }
422 
423 /// Returns a Value corresponding to the size of the given expression.
424 /// This Value may be either of the following:
425 ///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
426 ///     it)
427 ///   - A call to the @llvm.objectsize intrinsic
428 llvm::Value *
429 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
430                                        llvm::IntegerType *ResType) {
431   // We need to reference an argument if the pointer is a parameter with the
432   // pass_object_size attribute.
433   if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
434     auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
435     auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
436     if (Param != nullptr && PS != nullptr &&
437         areBOSTypesCompatible(PS->getType(), Type)) {
438       auto Iter = SizeArguments.find(Param);
439       assert(Iter != SizeArguments.end());
440 
441       const ImplicitParamDecl *D = Iter->second;
442       auto DIter = LocalDeclMap.find(D);
443       assert(DIter != LocalDeclMap.end());
444 
445       return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
446                               getContext().getSizeType(), E->getLocStart());
447     }
448   }
449 
450   // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
451   // evaluate E for side-effects. In either case, we shouldn't lower to
452   // @llvm.objectsize.
453   if (Type == 3 || E->HasSideEffects(getContext()))
454     return getDefaultBuiltinObjectSizeResult(Type, ResType);
455 
456   // LLVM only supports 0 and 2, make sure that we pass along that
457   // as a boolean.
458   auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1);
459   // FIXME: Get right address space.
460   llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)};
461   Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
462   return Builder.CreateCall(F, {EmitScalarExpr(E), CI});
463 }
464 
465 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
466                                         unsigned BuiltinID, const CallExpr *E,
467                                         ReturnValueSlot ReturnValue) {
468   // See if we can constant fold this builtin.  If so, don't emit it at all.
469   Expr::EvalResult Result;
470   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
471       !Result.hasSideEffects()) {
472     if (Result.Val.isInt())
473       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
474                                                 Result.Val.getInt()));
475     if (Result.Val.isFloat())
476       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
477                                                Result.Val.getFloat()));
478   }
479 
480   switch (BuiltinID) {
481   default: break;  // Handle intrinsics and libm functions below.
482   case Builtin::BI__builtin___CFStringMakeConstantString:
483   case Builtin::BI__builtin___NSStringMakeConstantString:
484     return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
485   case Builtin::BI__builtin_stdarg_start:
486   case Builtin::BI__builtin_va_start:
487   case Builtin::BI__va_start:
488   case Builtin::BI__builtin_va_end:
489     return RValue::get(
490         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
491                            ? EmitScalarExpr(E->getArg(0))
492                            : EmitVAListRef(E->getArg(0)).getPointer(),
493                        BuiltinID != Builtin::BI__builtin_va_end));
494   case Builtin::BI__builtin_va_copy: {
495     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
496     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
497 
498     llvm::Type *Type = Int8PtrTy;
499 
500     DstPtr = Builder.CreateBitCast(DstPtr, Type);
501     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
502     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
503                                           {DstPtr, SrcPtr}));
504   }
505   case Builtin::BI__builtin_abs:
506   case Builtin::BI__builtin_labs:
507   case Builtin::BI__builtin_llabs: {
508     Value *ArgValue = EmitScalarExpr(E->getArg(0));
509 
510     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
511     Value *CmpResult =
512     Builder.CreateICmpSGE(ArgValue,
513                           llvm::Constant::getNullValue(ArgValue->getType()),
514                                                             "abscond");
515     Value *Result =
516       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
517 
518     return RValue::get(Result);
519   }
520   case Builtin::BI__builtin_fabs:
521   case Builtin::BI__builtin_fabsf:
522   case Builtin::BI__builtin_fabsl: {
523     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
524   }
525   case Builtin::BI__builtin_fmod:
526   case Builtin::BI__builtin_fmodf:
527   case Builtin::BI__builtin_fmodl: {
528     Value *Arg1 = EmitScalarExpr(E->getArg(0));
529     Value *Arg2 = EmitScalarExpr(E->getArg(1));
530     Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
531     return RValue::get(Result);
532   }
533   case Builtin::BI__builtin_copysign:
534   case Builtin::BI__builtin_copysignf:
535   case Builtin::BI__builtin_copysignl: {
536     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
537   }
538   case Builtin::BI__builtin_ceil:
539   case Builtin::BI__builtin_ceilf:
540   case Builtin::BI__builtin_ceill: {
541     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
542   }
543   case Builtin::BI__builtin_floor:
544   case Builtin::BI__builtin_floorf:
545   case Builtin::BI__builtin_floorl: {
546     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
547   }
548   case Builtin::BI__builtin_trunc:
549   case Builtin::BI__builtin_truncf:
550   case Builtin::BI__builtin_truncl: {
551     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
552   }
553   case Builtin::BI__builtin_rint:
554   case Builtin::BI__builtin_rintf:
555   case Builtin::BI__builtin_rintl: {
556     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
557   }
558   case Builtin::BI__builtin_nearbyint:
559   case Builtin::BI__builtin_nearbyintf:
560   case Builtin::BI__builtin_nearbyintl: {
561     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
562   }
563   case Builtin::BI__builtin_round:
564   case Builtin::BI__builtin_roundf:
565   case Builtin::BI__builtin_roundl: {
566     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
567   }
568   case Builtin::BI__builtin_fmin:
569   case Builtin::BI__builtin_fminf:
570   case Builtin::BI__builtin_fminl: {
571     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
572   }
573   case Builtin::BI__builtin_fmax:
574   case Builtin::BI__builtin_fmaxf:
575   case Builtin::BI__builtin_fmaxl: {
576     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
577   }
578   case Builtin::BI__builtin_conj:
579   case Builtin::BI__builtin_conjf:
580   case Builtin::BI__builtin_conjl: {
581     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
582     Value *Real = ComplexVal.first;
583     Value *Imag = ComplexVal.second;
584     Value *Zero =
585       Imag->getType()->isFPOrFPVectorTy()
586         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
587         : llvm::Constant::getNullValue(Imag->getType());
588 
589     Imag = Builder.CreateFSub(Zero, Imag, "sub");
590     return RValue::getComplex(std::make_pair(Real, Imag));
591   }
592   case Builtin::BI__builtin_creal:
593   case Builtin::BI__builtin_crealf:
594   case Builtin::BI__builtin_creall:
595   case Builtin::BIcreal:
596   case Builtin::BIcrealf:
597   case Builtin::BIcreall: {
598     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
599     return RValue::get(ComplexVal.first);
600   }
601 
602   case Builtin::BI__builtin_cimag:
603   case Builtin::BI__builtin_cimagf:
604   case Builtin::BI__builtin_cimagl:
605   case Builtin::BIcimag:
606   case Builtin::BIcimagf:
607   case Builtin::BIcimagl: {
608     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
609     return RValue::get(ComplexVal.second);
610   }
611 
612   case Builtin::BI__builtin_ctzs:
613   case Builtin::BI__builtin_ctz:
614   case Builtin::BI__builtin_ctzl:
615   case Builtin::BI__builtin_ctzll: {
616     Value *ArgValue = EmitScalarExpr(E->getArg(0));
617 
618     llvm::Type *ArgType = ArgValue->getType();
619     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
620 
621     llvm::Type *ResultType = ConvertType(E->getType());
622     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
623     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
624     if (Result->getType() != ResultType)
625       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
626                                      "cast");
627     return RValue::get(Result);
628   }
629   case Builtin::BI__builtin_clzs:
630   case Builtin::BI__builtin_clz:
631   case Builtin::BI__builtin_clzl:
632   case Builtin::BI__builtin_clzll: {
633     Value *ArgValue = EmitScalarExpr(E->getArg(0));
634 
635     llvm::Type *ArgType = ArgValue->getType();
636     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
637 
638     llvm::Type *ResultType = ConvertType(E->getType());
639     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
640     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
641     if (Result->getType() != ResultType)
642       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
643                                      "cast");
644     return RValue::get(Result);
645   }
646   case Builtin::BI__builtin_ffs:
647   case Builtin::BI__builtin_ffsl:
648   case Builtin::BI__builtin_ffsll: {
649     // ffs(x) -> x ? cttz(x) + 1 : 0
650     Value *ArgValue = EmitScalarExpr(E->getArg(0));
651 
652     llvm::Type *ArgType = ArgValue->getType();
653     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
654 
655     llvm::Type *ResultType = ConvertType(E->getType());
656     Value *Tmp =
657         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
658                           llvm::ConstantInt::get(ArgType, 1));
659     Value *Zero = llvm::Constant::getNullValue(ArgType);
660     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
661     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
662     if (Result->getType() != ResultType)
663       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
664                                      "cast");
665     return RValue::get(Result);
666   }
667   case Builtin::BI__builtin_parity:
668   case Builtin::BI__builtin_parityl:
669   case Builtin::BI__builtin_parityll: {
670     // parity(x) -> ctpop(x) & 1
671     Value *ArgValue = EmitScalarExpr(E->getArg(0));
672 
673     llvm::Type *ArgType = ArgValue->getType();
674     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
675 
676     llvm::Type *ResultType = ConvertType(E->getType());
677     Value *Tmp = Builder.CreateCall(F, ArgValue);
678     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
679     if (Result->getType() != ResultType)
680       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
681                                      "cast");
682     return RValue::get(Result);
683   }
684   case Builtin::BI__builtin_popcount:
685   case Builtin::BI__builtin_popcountl:
686   case Builtin::BI__builtin_popcountll: {
687     Value *ArgValue = EmitScalarExpr(E->getArg(0));
688 
689     llvm::Type *ArgType = ArgValue->getType();
690     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
691 
692     llvm::Type *ResultType = ConvertType(E->getType());
693     Value *Result = Builder.CreateCall(F, ArgValue);
694     if (Result->getType() != ResultType)
695       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
696                                      "cast");
697     return RValue::get(Result);
698   }
699   case Builtin::BI__builtin_unpredictable: {
700     // Always return the argument of __builtin_unpredictable. LLVM does not
701     // handle this builtin. Metadata for this builtin should be added directly
702     // to instructions such as branches or switches that use it.
703     return RValue::get(EmitScalarExpr(E->getArg(0)));
704   }
705   case Builtin::BI__builtin_expect: {
706     Value *ArgValue = EmitScalarExpr(E->getArg(0));
707     llvm::Type *ArgType = ArgValue->getType();
708 
709     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
710     // Don't generate llvm.expect on -O0 as the backend won't use it for
711     // anything.
712     // Note, we still IRGen ExpectedValue because it could have side-effects.
713     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
714       return RValue::get(ArgValue);
715 
716     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
717     Value *Result =
718         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
719     return RValue::get(Result);
720   }
721   case Builtin::BI__builtin_assume_aligned: {
722     Value *PtrValue = EmitScalarExpr(E->getArg(0));
723     Value *OffsetValue =
724       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
725 
726     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
727     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
728     unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
729 
730     EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
731     return RValue::get(PtrValue);
732   }
733   case Builtin::BI__assume:
734   case Builtin::BI__builtin_assume: {
735     if (E->getArg(0)->HasSideEffects(getContext()))
736       return RValue::get(nullptr);
737 
738     Value *ArgValue = EmitScalarExpr(E->getArg(0));
739     Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
740     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
741   }
742   case Builtin::BI__builtin_bswap16:
743   case Builtin::BI__builtin_bswap32:
744   case Builtin::BI__builtin_bswap64: {
745     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
746   }
747   case Builtin::BI__builtin_bitreverse8:
748   case Builtin::BI__builtin_bitreverse16:
749   case Builtin::BI__builtin_bitreverse32:
750   case Builtin::BI__builtin_bitreverse64: {
751     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
752   }
753   case Builtin::BI__builtin_object_size: {
754     unsigned Type =
755         E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
756     auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
757 
758     // We pass this builtin onto the optimizer so that it can figure out the
759     // object size in more complex cases.
760     return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType));
761   }
762   case Builtin::BI__builtin_prefetch: {
763     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
764     // FIXME: Technically these constants should of type 'int', yes?
765     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
766       llvm::ConstantInt::get(Int32Ty, 0);
767     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
768       llvm::ConstantInt::get(Int32Ty, 3);
769     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
770     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
771     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
772   }
773   case Builtin::BI__builtin_readcyclecounter: {
774     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
775     return RValue::get(Builder.CreateCall(F));
776   }
777   case Builtin::BI__builtin___clear_cache: {
778     Value *Begin = EmitScalarExpr(E->getArg(0));
779     Value *End = EmitScalarExpr(E->getArg(1));
780     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
781     return RValue::get(Builder.CreateCall(F, {Begin, End}));
782   }
783   case Builtin::BI__builtin_trap:
784     return RValue::get(EmitTrapCall(Intrinsic::trap));
785   case Builtin::BI__debugbreak:
786     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
787   case Builtin::BI__builtin_unreachable: {
788     if (SanOpts.has(SanitizerKind::Unreachable)) {
789       SanitizerScope SanScope(this);
790       EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
791                                SanitizerKind::Unreachable),
792                 "builtin_unreachable", EmitCheckSourceLocation(E->getExprLoc()),
793                 None);
794     } else
795       Builder.CreateUnreachable();
796 
797     // We do need to preserve an insertion point.
798     EmitBlock(createBasicBlock("unreachable.cont"));
799 
800     return RValue::get(nullptr);
801   }
802 
803   case Builtin::BI__builtin_powi:
804   case Builtin::BI__builtin_powif:
805   case Builtin::BI__builtin_powil: {
806     Value *Base = EmitScalarExpr(E->getArg(0));
807     Value *Exponent = EmitScalarExpr(E->getArg(1));
808     llvm::Type *ArgType = Base->getType();
809     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
810     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
811   }
812 
813   case Builtin::BI__builtin_isgreater:
814   case Builtin::BI__builtin_isgreaterequal:
815   case Builtin::BI__builtin_isless:
816   case Builtin::BI__builtin_islessequal:
817   case Builtin::BI__builtin_islessgreater:
818   case Builtin::BI__builtin_isunordered: {
819     // Ordered comparisons: we know the arguments to these are matching scalar
820     // floating point values.
821     Value *LHS = EmitScalarExpr(E->getArg(0));
822     Value *RHS = EmitScalarExpr(E->getArg(1));
823 
824     switch (BuiltinID) {
825     default: llvm_unreachable("Unknown ordered comparison");
826     case Builtin::BI__builtin_isgreater:
827       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
828       break;
829     case Builtin::BI__builtin_isgreaterequal:
830       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
831       break;
832     case Builtin::BI__builtin_isless:
833       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
834       break;
835     case Builtin::BI__builtin_islessequal:
836       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
837       break;
838     case Builtin::BI__builtin_islessgreater:
839       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
840       break;
841     case Builtin::BI__builtin_isunordered:
842       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
843       break;
844     }
845     // ZExt bool to int type.
846     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
847   }
848   case Builtin::BI__builtin_isnan: {
849     Value *V = EmitScalarExpr(E->getArg(0));
850     V = Builder.CreateFCmpUNO(V, V, "cmp");
851     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
852   }
853 
854   case Builtin::BI__builtin_isinf:
855   case Builtin::BI__builtin_isfinite: {
856     // isinf(x)    --> fabs(x) == infinity
857     // isfinite(x) --> fabs(x) != infinity
858     // x != NaN via the ordered compare in either case.
859     Value *V = EmitScalarExpr(E->getArg(0));
860     Value *Fabs = EmitFAbs(*this, V);
861     Constant *Infinity = ConstantFP::getInfinity(V->getType());
862     CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
863                                   ? CmpInst::FCMP_OEQ
864                                   : CmpInst::FCMP_ONE;
865     Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
866     return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
867   }
868 
869   case Builtin::BI__builtin_isinf_sign: {
870     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
871     Value *Arg = EmitScalarExpr(E->getArg(0));
872     Value *AbsArg = EmitFAbs(*this, Arg);
873     Value *IsInf = Builder.CreateFCmpOEQ(
874         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
875     Value *IsNeg = EmitSignBit(*this, Arg);
876 
877     llvm::Type *IntTy = ConvertType(E->getType());
878     Value *Zero = Constant::getNullValue(IntTy);
879     Value *One = ConstantInt::get(IntTy, 1);
880     Value *NegativeOne = ConstantInt::get(IntTy, -1);
881     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
882     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
883     return RValue::get(Result);
884   }
885 
886   case Builtin::BI__builtin_isnormal: {
887     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
888     Value *V = EmitScalarExpr(E->getArg(0));
889     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
890 
891     Value *Abs = EmitFAbs(*this, V);
892     Value *IsLessThanInf =
893       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
894     APFloat Smallest = APFloat::getSmallestNormalized(
895                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
896     Value *IsNormal =
897       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
898                             "isnormal");
899     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
900     V = Builder.CreateAnd(V, IsNormal, "and");
901     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
902   }
903 
904   case Builtin::BI__builtin_fpclassify: {
905     Value *V = EmitScalarExpr(E->getArg(5));
906     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
907 
908     // Create Result
909     BasicBlock *Begin = Builder.GetInsertBlock();
910     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
911     Builder.SetInsertPoint(End);
912     PHINode *Result =
913       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
914                         "fpclassify_result");
915 
916     // if (V==0) return FP_ZERO
917     Builder.SetInsertPoint(Begin);
918     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
919                                           "iszero");
920     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
921     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
922     Builder.CreateCondBr(IsZero, End, NotZero);
923     Result->addIncoming(ZeroLiteral, Begin);
924 
925     // if (V != V) return FP_NAN
926     Builder.SetInsertPoint(NotZero);
927     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
928     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
929     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
930     Builder.CreateCondBr(IsNan, End, NotNan);
931     Result->addIncoming(NanLiteral, NotZero);
932 
933     // if (fabs(V) == infinity) return FP_INFINITY
934     Builder.SetInsertPoint(NotNan);
935     Value *VAbs = EmitFAbs(*this, V);
936     Value *IsInf =
937       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
938                             "isinf");
939     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
940     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
941     Builder.CreateCondBr(IsInf, End, NotInf);
942     Result->addIncoming(InfLiteral, NotNan);
943 
944     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
945     Builder.SetInsertPoint(NotInf);
946     APFloat Smallest = APFloat::getSmallestNormalized(
947         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
948     Value *IsNormal =
949       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
950                             "isnormal");
951     Value *NormalResult =
952       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
953                            EmitScalarExpr(E->getArg(3)));
954     Builder.CreateBr(End);
955     Result->addIncoming(NormalResult, NotInf);
956 
957     // return Result
958     Builder.SetInsertPoint(End);
959     return RValue::get(Result);
960   }
961 
962   case Builtin::BIalloca:
963   case Builtin::BI_alloca:
964   case Builtin::BI__builtin_alloca: {
965     Value *Size = EmitScalarExpr(E->getArg(0));
966     return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
967   }
968   case Builtin::BIbzero:
969   case Builtin::BI__builtin_bzero: {
970     Address Dest = EmitPointerWithAlignment(E->getArg(0));
971     Value *SizeVal = EmitScalarExpr(E->getArg(1));
972     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
973                         E->getArg(0)->getExprLoc(), FD, 0);
974     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
975     return RValue::get(Dest.getPointer());
976   }
977   case Builtin::BImemcpy:
978   case Builtin::BI__builtin_memcpy: {
979     Address Dest = EmitPointerWithAlignment(E->getArg(0));
980     Address Src = EmitPointerWithAlignment(E->getArg(1));
981     Value *SizeVal = EmitScalarExpr(E->getArg(2));
982     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
983                         E->getArg(0)->getExprLoc(), FD, 0);
984     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
985                         E->getArg(1)->getExprLoc(), FD, 1);
986     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
987     return RValue::get(Dest.getPointer());
988   }
989 
990   case Builtin::BI__builtin___memcpy_chk: {
991     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
992     llvm::APSInt Size, DstSize;
993     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
994         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
995       break;
996     if (Size.ugt(DstSize))
997       break;
998     Address Dest = EmitPointerWithAlignment(E->getArg(0));
999     Address Src = EmitPointerWithAlignment(E->getArg(1));
1000     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1001     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1002     return RValue::get(Dest.getPointer());
1003   }
1004 
1005   case Builtin::BI__builtin_objc_memmove_collectable: {
1006     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1007     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1008     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1009     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1010                                                   DestAddr, SrcAddr, SizeVal);
1011     return RValue::get(DestAddr.getPointer());
1012   }
1013 
1014   case Builtin::BI__builtin___memmove_chk: {
1015     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1016     llvm::APSInt Size, DstSize;
1017     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1018         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1019       break;
1020     if (Size.ugt(DstSize))
1021       break;
1022     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1023     Address Src = EmitPointerWithAlignment(E->getArg(1));
1024     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1025     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1026     return RValue::get(Dest.getPointer());
1027   }
1028 
1029   case Builtin::BImemmove:
1030   case Builtin::BI__builtin_memmove: {
1031     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1032     Address Src = EmitPointerWithAlignment(E->getArg(1));
1033     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1034     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1035                         E->getArg(0)->getExprLoc(), FD, 0);
1036     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1037                         E->getArg(1)->getExprLoc(), FD, 1);
1038     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1039     return RValue::get(Dest.getPointer());
1040   }
1041   case Builtin::BImemset:
1042   case Builtin::BI__builtin_memset: {
1043     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1044     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1045                                          Builder.getInt8Ty());
1046     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1047     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1048                         E->getArg(0)->getExprLoc(), FD, 0);
1049     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1050     return RValue::get(Dest.getPointer());
1051   }
1052   case Builtin::BI__builtin___memset_chk: {
1053     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1054     llvm::APSInt Size, DstSize;
1055     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1056         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1057       break;
1058     if (Size.ugt(DstSize))
1059       break;
1060     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1061     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1062                                          Builder.getInt8Ty());
1063     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1064     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1065     return RValue::get(Dest.getPointer());
1066   }
1067   case Builtin::BI__builtin_dwarf_cfa: {
1068     // The offset in bytes from the first argument to the CFA.
1069     //
1070     // Why on earth is this in the frontend?  Is there any reason at
1071     // all that the backend can't reasonably determine this while
1072     // lowering llvm.eh.dwarf.cfa()?
1073     //
1074     // TODO: If there's a satisfactory reason, add a target hook for
1075     // this instead of hard-coding 0, which is correct for most targets.
1076     int32_t Offset = 0;
1077 
1078     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1079     return RValue::get(Builder.CreateCall(F,
1080                                       llvm::ConstantInt::get(Int32Ty, Offset)));
1081   }
1082   case Builtin::BI__builtin_return_address: {
1083     Value *Depth =
1084         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1085     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1086     return RValue::get(Builder.CreateCall(F, Depth));
1087   }
1088   case Builtin::BI__builtin_frame_address: {
1089     Value *Depth =
1090         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1091     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1092     return RValue::get(Builder.CreateCall(F, Depth));
1093   }
1094   case Builtin::BI__builtin_extract_return_addr: {
1095     Value *Address = EmitScalarExpr(E->getArg(0));
1096     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1097     return RValue::get(Result);
1098   }
1099   case Builtin::BI__builtin_frob_return_addr: {
1100     Value *Address = EmitScalarExpr(E->getArg(0));
1101     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1102     return RValue::get(Result);
1103   }
1104   case Builtin::BI__builtin_dwarf_sp_column: {
1105     llvm::IntegerType *Ty
1106       = cast<llvm::IntegerType>(ConvertType(E->getType()));
1107     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1108     if (Column == -1) {
1109       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1110       return RValue::get(llvm::UndefValue::get(Ty));
1111     }
1112     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1113   }
1114   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1115     Value *Address = EmitScalarExpr(E->getArg(0));
1116     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1117       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1118     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1119   }
1120   case Builtin::BI__builtin_eh_return: {
1121     Value *Int = EmitScalarExpr(E->getArg(0));
1122     Value *Ptr = EmitScalarExpr(E->getArg(1));
1123 
1124     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1125     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1126            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1127     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1128                                   ? Intrinsic::eh_return_i32
1129                                   : Intrinsic::eh_return_i64);
1130     Builder.CreateCall(F, {Int, Ptr});
1131     Builder.CreateUnreachable();
1132 
1133     // We do need to preserve an insertion point.
1134     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1135 
1136     return RValue::get(nullptr);
1137   }
1138   case Builtin::BI__builtin_unwind_init: {
1139     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1140     return RValue::get(Builder.CreateCall(F));
1141   }
1142   case Builtin::BI__builtin_extend_pointer: {
1143     // Extends a pointer to the size of an _Unwind_Word, which is
1144     // uint64_t on all platforms.  Generally this gets poked into a
1145     // register and eventually used as an address, so if the
1146     // addressing registers are wider than pointers and the platform
1147     // doesn't implicitly ignore high-order bits when doing
1148     // addressing, we need to make sure we zext / sext based on
1149     // the platform's expectations.
1150     //
1151     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1152 
1153     // Cast the pointer to intptr_t.
1154     Value *Ptr = EmitScalarExpr(E->getArg(0));
1155     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1156 
1157     // If that's 64 bits, we're done.
1158     if (IntPtrTy->getBitWidth() == 64)
1159       return RValue::get(Result);
1160 
1161     // Otherwise, ask the codegen data what to do.
1162     if (getTargetHooks().extendPointerWithSExt())
1163       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1164     else
1165       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1166   }
1167   case Builtin::BI__builtin_setjmp: {
1168     // Buffer is a void**.
1169     Address Buf = EmitPointerWithAlignment(E->getArg(0));
1170 
1171     // Store the frame pointer to the setjmp buffer.
1172     Value *FrameAddr =
1173       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1174                          ConstantInt::get(Int32Ty, 0));
1175     Builder.CreateStore(FrameAddr, Buf);
1176 
1177     // Store the stack pointer to the setjmp buffer.
1178     Value *StackAddr =
1179         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1180     Address StackSaveSlot =
1181       Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1182     Builder.CreateStore(StackAddr, StackSaveSlot);
1183 
1184     // Call LLVM's EH setjmp, which is lightweight.
1185     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1186     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1187     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1188   }
1189   case Builtin::BI__builtin_longjmp: {
1190     Value *Buf = EmitScalarExpr(E->getArg(0));
1191     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1192 
1193     // Call LLVM's EH longjmp, which is lightweight.
1194     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1195 
1196     // longjmp doesn't return; mark this as unreachable.
1197     Builder.CreateUnreachable();
1198 
1199     // We do need to preserve an insertion point.
1200     EmitBlock(createBasicBlock("longjmp.cont"));
1201 
1202     return RValue::get(nullptr);
1203   }
1204   case Builtin::BI__sync_fetch_and_add:
1205   case Builtin::BI__sync_fetch_and_sub:
1206   case Builtin::BI__sync_fetch_and_or:
1207   case Builtin::BI__sync_fetch_and_and:
1208   case Builtin::BI__sync_fetch_and_xor:
1209   case Builtin::BI__sync_fetch_and_nand:
1210   case Builtin::BI__sync_add_and_fetch:
1211   case Builtin::BI__sync_sub_and_fetch:
1212   case Builtin::BI__sync_and_and_fetch:
1213   case Builtin::BI__sync_or_and_fetch:
1214   case Builtin::BI__sync_xor_and_fetch:
1215   case Builtin::BI__sync_nand_and_fetch:
1216   case Builtin::BI__sync_val_compare_and_swap:
1217   case Builtin::BI__sync_bool_compare_and_swap:
1218   case Builtin::BI__sync_lock_test_and_set:
1219   case Builtin::BI__sync_lock_release:
1220   case Builtin::BI__sync_swap:
1221     llvm_unreachable("Shouldn't make it through sema");
1222   case Builtin::BI__sync_fetch_and_add_1:
1223   case Builtin::BI__sync_fetch_and_add_2:
1224   case Builtin::BI__sync_fetch_and_add_4:
1225   case Builtin::BI__sync_fetch_and_add_8:
1226   case Builtin::BI__sync_fetch_and_add_16:
1227     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1228   case Builtin::BI__sync_fetch_and_sub_1:
1229   case Builtin::BI__sync_fetch_and_sub_2:
1230   case Builtin::BI__sync_fetch_and_sub_4:
1231   case Builtin::BI__sync_fetch_and_sub_8:
1232   case Builtin::BI__sync_fetch_and_sub_16:
1233     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1234   case Builtin::BI__sync_fetch_and_or_1:
1235   case Builtin::BI__sync_fetch_and_or_2:
1236   case Builtin::BI__sync_fetch_and_or_4:
1237   case Builtin::BI__sync_fetch_and_or_8:
1238   case Builtin::BI__sync_fetch_and_or_16:
1239     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1240   case Builtin::BI__sync_fetch_and_and_1:
1241   case Builtin::BI__sync_fetch_and_and_2:
1242   case Builtin::BI__sync_fetch_and_and_4:
1243   case Builtin::BI__sync_fetch_and_and_8:
1244   case Builtin::BI__sync_fetch_and_and_16:
1245     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1246   case Builtin::BI__sync_fetch_and_xor_1:
1247   case Builtin::BI__sync_fetch_and_xor_2:
1248   case Builtin::BI__sync_fetch_and_xor_4:
1249   case Builtin::BI__sync_fetch_and_xor_8:
1250   case Builtin::BI__sync_fetch_and_xor_16:
1251     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1252   case Builtin::BI__sync_fetch_and_nand_1:
1253   case Builtin::BI__sync_fetch_and_nand_2:
1254   case Builtin::BI__sync_fetch_and_nand_4:
1255   case Builtin::BI__sync_fetch_and_nand_8:
1256   case Builtin::BI__sync_fetch_and_nand_16:
1257     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1258 
1259   // Clang extensions: not overloaded yet.
1260   case Builtin::BI__sync_fetch_and_min:
1261     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1262   case Builtin::BI__sync_fetch_and_max:
1263     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1264   case Builtin::BI__sync_fetch_and_umin:
1265     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1266   case Builtin::BI__sync_fetch_and_umax:
1267     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1268 
1269   case Builtin::BI__sync_add_and_fetch_1:
1270   case Builtin::BI__sync_add_and_fetch_2:
1271   case Builtin::BI__sync_add_and_fetch_4:
1272   case Builtin::BI__sync_add_and_fetch_8:
1273   case Builtin::BI__sync_add_and_fetch_16:
1274     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1275                                 llvm::Instruction::Add);
1276   case Builtin::BI__sync_sub_and_fetch_1:
1277   case Builtin::BI__sync_sub_and_fetch_2:
1278   case Builtin::BI__sync_sub_and_fetch_4:
1279   case Builtin::BI__sync_sub_and_fetch_8:
1280   case Builtin::BI__sync_sub_and_fetch_16:
1281     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1282                                 llvm::Instruction::Sub);
1283   case Builtin::BI__sync_and_and_fetch_1:
1284   case Builtin::BI__sync_and_and_fetch_2:
1285   case Builtin::BI__sync_and_and_fetch_4:
1286   case Builtin::BI__sync_and_and_fetch_8:
1287   case Builtin::BI__sync_and_and_fetch_16:
1288     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1289                                 llvm::Instruction::And);
1290   case Builtin::BI__sync_or_and_fetch_1:
1291   case Builtin::BI__sync_or_and_fetch_2:
1292   case Builtin::BI__sync_or_and_fetch_4:
1293   case Builtin::BI__sync_or_and_fetch_8:
1294   case Builtin::BI__sync_or_and_fetch_16:
1295     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1296                                 llvm::Instruction::Or);
1297   case Builtin::BI__sync_xor_and_fetch_1:
1298   case Builtin::BI__sync_xor_and_fetch_2:
1299   case Builtin::BI__sync_xor_and_fetch_4:
1300   case Builtin::BI__sync_xor_and_fetch_8:
1301   case Builtin::BI__sync_xor_and_fetch_16:
1302     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1303                                 llvm::Instruction::Xor);
1304   case Builtin::BI__sync_nand_and_fetch_1:
1305   case Builtin::BI__sync_nand_and_fetch_2:
1306   case Builtin::BI__sync_nand_and_fetch_4:
1307   case Builtin::BI__sync_nand_and_fetch_8:
1308   case Builtin::BI__sync_nand_and_fetch_16:
1309     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1310                                 llvm::Instruction::And, true);
1311 
1312   case Builtin::BI__sync_val_compare_and_swap_1:
1313   case Builtin::BI__sync_val_compare_and_swap_2:
1314   case Builtin::BI__sync_val_compare_and_swap_4:
1315   case Builtin::BI__sync_val_compare_and_swap_8:
1316   case Builtin::BI__sync_val_compare_and_swap_16:
1317     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1318 
1319   case Builtin::BI__sync_bool_compare_and_swap_1:
1320   case Builtin::BI__sync_bool_compare_and_swap_2:
1321   case Builtin::BI__sync_bool_compare_and_swap_4:
1322   case Builtin::BI__sync_bool_compare_and_swap_8:
1323   case Builtin::BI__sync_bool_compare_and_swap_16:
1324     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1325 
1326   case Builtin::BI__sync_swap_1:
1327   case Builtin::BI__sync_swap_2:
1328   case Builtin::BI__sync_swap_4:
1329   case Builtin::BI__sync_swap_8:
1330   case Builtin::BI__sync_swap_16:
1331     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1332 
1333   case Builtin::BI__sync_lock_test_and_set_1:
1334   case Builtin::BI__sync_lock_test_and_set_2:
1335   case Builtin::BI__sync_lock_test_and_set_4:
1336   case Builtin::BI__sync_lock_test_and_set_8:
1337   case Builtin::BI__sync_lock_test_and_set_16:
1338     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1339 
1340   case Builtin::BI__sync_lock_release_1:
1341   case Builtin::BI__sync_lock_release_2:
1342   case Builtin::BI__sync_lock_release_4:
1343   case Builtin::BI__sync_lock_release_8:
1344   case Builtin::BI__sync_lock_release_16: {
1345     Value *Ptr = EmitScalarExpr(E->getArg(0));
1346     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1347     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1348     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1349                                              StoreSize.getQuantity() * 8);
1350     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1351     llvm::StoreInst *Store =
1352       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1353                                  StoreSize);
1354     Store->setAtomic(llvm::AtomicOrdering::Release);
1355     return RValue::get(nullptr);
1356   }
1357 
1358   case Builtin::BI__sync_synchronize: {
1359     // We assume this is supposed to correspond to a C++0x-style
1360     // sequentially-consistent fence (i.e. this is only usable for
1361     // synchonization, not device I/O or anything like that). This intrinsic
1362     // is really badly designed in the sense that in theory, there isn't
1363     // any way to safely use it... but in practice, it mostly works
1364     // to use it with non-atomic loads and stores to get acquire/release
1365     // semantics.
1366     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1367     return RValue::get(nullptr);
1368   }
1369 
1370   case Builtin::BI__builtin_nontemporal_load:
1371     return RValue::get(EmitNontemporalLoad(*this, E));
1372   case Builtin::BI__builtin_nontemporal_store:
1373     return RValue::get(EmitNontemporalStore(*this, E));
1374   case Builtin::BI__c11_atomic_is_lock_free:
1375   case Builtin::BI__atomic_is_lock_free: {
1376     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1377     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1378     // _Atomic(T) is always properly-aligned.
1379     const char *LibCallName = "__atomic_is_lock_free";
1380     CallArgList Args;
1381     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1382              getContext().getSizeType());
1383     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1384       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1385                getContext().VoidPtrTy);
1386     else
1387       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1388                getContext().VoidPtrTy);
1389     const CGFunctionInfo &FuncInfo =
1390         CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1391     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1392     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1393     return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
1394   }
1395 
1396   case Builtin::BI__atomic_test_and_set: {
1397     // Look at the argument type to determine whether this is a volatile
1398     // operation. The parameter type is always volatile.
1399     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1400     bool Volatile =
1401         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1402 
1403     Value *Ptr = EmitScalarExpr(E->getArg(0));
1404     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1405     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1406     Value *NewVal = Builder.getInt8(1);
1407     Value *Order = EmitScalarExpr(E->getArg(1));
1408     if (isa<llvm::ConstantInt>(Order)) {
1409       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1410       AtomicRMWInst *Result = nullptr;
1411       switch (ord) {
1412       case 0:  // memory_order_relaxed
1413       default: // invalid order
1414         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1415                                          llvm::AtomicOrdering::Monotonic);
1416         break;
1417       case 1: // memory_order_consume
1418       case 2: // memory_order_acquire
1419         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1420                                          llvm::AtomicOrdering::Acquire);
1421         break;
1422       case 3: // memory_order_release
1423         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1424                                          llvm::AtomicOrdering::Release);
1425         break;
1426       case 4: // memory_order_acq_rel
1427 
1428         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1429                                          llvm::AtomicOrdering::AcquireRelease);
1430         break;
1431       case 5: // memory_order_seq_cst
1432         Result = Builder.CreateAtomicRMW(
1433             llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1434             llvm::AtomicOrdering::SequentiallyConsistent);
1435         break;
1436       }
1437       Result->setVolatile(Volatile);
1438       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1439     }
1440 
1441     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1442 
1443     llvm::BasicBlock *BBs[5] = {
1444       createBasicBlock("monotonic", CurFn),
1445       createBasicBlock("acquire", CurFn),
1446       createBasicBlock("release", CurFn),
1447       createBasicBlock("acqrel", CurFn),
1448       createBasicBlock("seqcst", CurFn)
1449     };
1450     llvm::AtomicOrdering Orders[5] = {
1451         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1452         llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1453         llvm::AtomicOrdering::SequentiallyConsistent};
1454 
1455     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1456     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1457 
1458     Builder.SetInsertPoint(ContBB);
1459     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1460 
1461     for (unsigned i = 0; i < 5; ++i) {
1462       Builder.SetInsertPoint(BBs[i]);
1463       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1464                                                    Ptr, NewVal, Orders[i]);
1465       RMW->setVolatile(Volatile);
1466       Result->addIncoming(RMW, BBs[i]);
1467       Builder.CreateBr(ContBB);
1468     }
1469 
1470     SI->addCase(Builder.getInt32(0), BBs[0]);
1471     SI->addCase(Builder.getInt32(1), BBs[1]);
1472     SI->addCase(Builder.getInt32(2), BBs[1]);
1473     SI->addCase(Builder.getInt32(3), BBs[2]);
1474     SI->addCase(Builder.getInt32(4), BBs[3]);
1475     SI->addCase(Builder.getInt32(5), BBs[4]);
1476 
1477     Builder.SetInsertPoint(ContBB);
1478     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1479   }
1480 
1481   case Builtin::BI__atomic_clear: {
1482     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1483     bool Volatile =
1484         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1485 
1486     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1487     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1488     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1489     Value *NewVal = Builder.getInt8(0);
1490     Value *Order = EmitScalarExpr(E->getArg(1));
1491     if (isa<llvm::ConstantInt>(Order)) {
1492       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1493       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1494       switch (ord) {
1495       case 0:  // memory_order_relaxed
1496       default: // invalid order
1497         Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1498         break;
1499       case 3:  // memory_order_release
1500         Store->setOrdering(llvm::AtomicOrdering::Release);
1501         break;
1502       case 5:  // memory_order_seq_cst
1503         Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1504         break;
1505       }
1506       return RValue::get(nullptr);
1507     }
1508 
1509     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1510 
1511     llvm::BasicBlock *BBs[3] = {
1512       createBasicBlock("monotonic", CurFn),
1513       createBasicBlock("release", CurFn),
1514       createBasicBlock("seqcst", CurFn)
1515     };
1516     llvm::AtomicOrdering Orders[3] = {
1517         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1518         llvm::AtomicOrdering::SequentiallyConsistent};
1519 
1520     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1521     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1522 
1523     for (unsigned i = 0; i < 3; ++i) {
1524       Builder.SetInsertPoint(BBs[i]);
1525       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1526       Store->setOrdering(Orders[i]);
1527       Builder.CreateBr(ContBB);
1528     }
1529 
1530     SI->addCase(Builder.getInt32(0), BBs[0]);
1531     SI->addCase(Builder.getInt32(3), BBs[1]);
1532     SI->addCase(Builder.getInt32(5), BBs[2]);
1533 
1534     Builder.SetInsertPoint(ContBB);
1535     return RValue::get(nullptr);
1536   }
1537 
1538   case Builtin::BI__atomic_thread_fence:
1539   case Builtin::BI__atomic_signal_fence:
1540   case Builtin::BI__c11_atomic_thread_fence:
1541   case Builtin::BI__c11_atomic_signal_fence: {
1542     llvm::SynchronizationScope Scope;
1543     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1544         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1545       Scope = llvm::SingleThread;
1546     else
1547       Scope = llvm::CrossThread;
1548     Value *Order = EmitScalarExpr(E->getArg(0));
1549     if (isa<llvm::ConstantInt>(Order)) {
1550       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1551       switch (ord) {
1552       case 0:  // memory_order_relaxed
1553       default: // invalid order
1554         break;
1555       case 1:  // memory_order_consume
1556       case 2:  // memory_order_acquire
1557         Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1558         break;
1559       case 3:  // memory_order_release
1560         Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1561         break;
1562       case 4:  // memory_order_acq_rel
1563         Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1564         break;
1565       case 5:  // memory_order_seq_cst
1566         Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
1567                             Scope);
1568         break;
1569       }
1570       return RValue::get(nullptr);
1571     }
1572 
1573     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1574     AcquireBB = createBasicBlock("acquire", CurFn);
1575     ReleaseBB = createBasicBlock("release", CurFn);
1576     AcqRelBB = createBasicBlock("acqrel", CurFn);
1577     SeqCstBB = createBasicBlock("seqcst", CurFn);
1578     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1579 
1580     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1581     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1582 
1583     Builder.SetInsertPoint(AcquireBB);
1584     Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1585     Builder.CreateBr(ContBB);
1586     SI->addCase(Builder.getInt32(1), AcquireBB);
1587     SI->addCase(Builder.getInt32(2), AcquireBB);
1588 
1589     Builder.SetInsertPoint(ReleaseBB);
1590     Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1591     Builder.CreateBr(ContBB);
1592     SI->addCase(Builder.getInt32(3), ReleaseBB);
1593 
1594     Builder.SetInsertPoint(AcqRelBB);
1595     Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1596     Builder.CreateBr(ContBB);
1597     SI->addCase(Builder.getInt32(4), AcqRelBB);
1598 
1599     Builder.SetInsertPoint(SeqCstBB);
1600     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope);
1601     Builder.CreateBr(ContBB);
1602     SI->addCase(Builder.getInt32(5), SeqCstBB);
1603 
1604     Builder.SetInsertPoint(ContBB);
1605     return RValue::get(nullptr);
1606   }
1607 
1608     // Library functions with special handling.
1609   case Builtin::BIsqrt:
1610   case Builtin::BIsqrtf:
1611   case Builtin::BIsqrtl: {
1612     // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1613     // in finite- or unsafe-math mode (the intrinsic has different semantics
1614     // for handling negative numbers compared to the library function, so
1615     // -fmath-errno=0 is not enough).
1616     if (!FD->hasAttr<ConstAttr>())
1617       break;
1618     if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1619           CGM.getCodeGenOpts().NoNaNsFPMath))
1620       break;
1621     Value *Arg0 = EmitScalarExpr(E->getArg(0));
1622     llvm::Type *ArgType = Arg0->getType();
1623     Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1624     return RValue::get(Builder.CreateCall(F, Arg0));
1625   }
1626 
1627   case Builtin::BI__builtin_pow:
1628   case Builtin::BI__builtin_powf:
1629   case Builtin::BI__builtin_powl:
1630   case Builtin::BIpow:
1631   case Builtin::BIpowf:
1632   case Builtin::BIpowl: {
1633     // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1634     if (!FD->hasAttr<ConstAttr>())
1635       break;
1636     Value *Base = EmitScalarExpr(E->getArg(0));
1637     Value *Exponent = EmitScalarExpr(E->getArg(1));
1638     llvm::Type *ArgType = Base->getType();
1639     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1640     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1641   }
1642 
1643   case Builtin::BIfma:
1644   case Builtin::BIfmaf:
1645   case Builtin::BIfmal:
1646   case Builtin::BI__builtin_fma:
1647   case Builtin::BI__builtin_fmaf:
1648   case Builtin::BI__builtin_fmal: {
1649     // Rewrite fma to intrinsic.
1650     Value *FirstArg = EmitScalarExpr(E->getArg(0));
1651     llvm::Type *ArgType = FirstArg->getType();
1652     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1653     return RValue::get(
1654         Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1655                                EmitScalarExpr(E->getArg(2))}));
1656   }
1657 
1658   case Builtin::BI__builtin_signbit:
1659   case Builtin::BI__builtin_signbitf:
1660   case Builtin::BI__builtin_signbitl: {
1661     return RValue::get(
1662         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1663                            ConvertType(E->getType())));
1664   }
1665   case Builtin::BI__builtin_annotation: {
1666     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1667     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1668                                       AnnVal->getType());
1669 
1670     // Get the annotation string, go through casts. Sema requires this to be a
1671     // non-wide string literal, potentially casted, so the cast<> is safe.
1672     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1673     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1674     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1675   }
1676   case Builtin::BI__builtin_addcb:
1677   case Builtin::BI__builtin_addcs:
1678   case Builtin::BI__builtin_addc:
1679   case Builtin::BI__builtin_addcl:
1680   case Builtin::BI__builtin_addcll:
1681   case Builtin::BI__builtin_subcb:
1682   case Builtin::BI__builtin_subcs:
1683   case Builtin::BI__builtin_subc:
1684   case Builtin::BI__builtin_subcl:
1685   case Builtin::BI__builtin_subcll: {
1686 
1687     // We translate all of these builtins from expressions of the form:
1688     //   int x = ..., y = ..., carryin = ..., carryout, result;
1689     //   result = __builtin_addc(x, y, carryin, &carryout);
1690     //
1691     // to LLVM IR of the form:
1692     //
1693     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1694     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1695     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
1696     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1697     //                                                       i32 %carryin)
1698     //   %result = extractvalue {i32, i1} %tmp2, 0
1699     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
1700     //   %tmp3 = or i1 %carry1, %carry2
1701     //   %tmp4 = zext i1 %tmp3 to i32
1702     //   store i32 %tmp4, i32* %carryout
1703 
1704     // Scalarize our inputs.
1705     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1706     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1707     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1708     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
1709 
1710     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1711     llvm::Intrinsic::ID IntrinsicId;
1712     switch (BuiltinID) {
1713     default: llvm_unreachable("Unknown multiprecision builtin id.");
1714     case Builtin::BI__builtin_addcb:
1715     case Builtin::BI__builtin_addcs:
1716     case Builtin::BI__builtin_addc:
1717     case Builtin::BI__builtin_addcl:
1718     case Builtin::BI__builtin_addcll:
1719       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1720       break;
1721     case Builtin::BI__builtin_subcb:
1722     case Builtin::BI__builtin_subcs:
1723     case Builtin::BI__builtin_subc:
1724     case Builtin::BI__builtin_subcl:
1725     case Builtin::BI__builtin_subcll:
1726       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1727       break;
1728     }
1729 
1730     // Construct our resulting LLVM IR expression.
1731     llvm::Value *Carry1;
1732     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
1733                                               X, Y, Carry1);
1734     llvm::Value *Carry2;
1735     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
1736                                               Sum1, Carryin, Carry2);
1737     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
1738                                                X->getType());
1739     Builder.CreateStore(CarryOut, CarryOutPtr);
1740     return RValue::get(Sum2);
1741   }
1742 
1743   case Builtin::BI__builtin_add_overflow:
1744   case Builtin::BI__builtin_sub_overflow:
1745   case Builtin::BI__builtin_mul_overflow: {
1746     const clang::Expr *LeftArg = E->getArg(0);
1747     const clang::Expr *RightArg = E->getArg(1);
1748     const clang::Expr *ResultArg = E->getArg(2);
1749 
1750     clang::QualType ResultQTy =
1751         ResultArg->getType()->castAs<PointerType>()->getPointeeType();
1752 
1753     WidthAndSignedness LeftInfo =
1754         getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
1755     WidthAndSignedness RightInfo =
1756         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
1757     WidthAndSignedness ResultInfo =
1758         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
1759     WidthAndSignedness EncompassingInfo =
1760         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
1761 
1762     llvm::Type *EncompassingLLVMTy =
1763         llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
1764 
1765     llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
1766 
1767     llvm::Intrinsic::ID IntrinsicId;
1768     switch (BuiltinID) {
1769     default:
1770       llvm_unreachable("Unknown overflow builtin id.");
1771     case Builtin::BI__builtin_add_overflow:
1772       IntrinsicId = EncompassingInfo.Signed
1773                         ? llvm::Intrinsic::sadd_with_overflow
1774                         : llvm::Intrinsic::uadd_with_overflow;
1775       break;
1776     case Builtin::BI__builtin_sub_overflow:
1777       IntrinsicId = EncompassingInfo.Signed
1778                         ? llvm::Intrinsic::ssub_with_overflow
1779                         : llvm::Intrinsic::usub_with_overflow;
1780       break;
1781     case Builtin::BI__builtin_mul_overflow:
1782       IntrinsicId = EncompassingInfo.Signed
1783                         ? llvm::Intrinsic::smul_with_overflow
1784                         : llvm::Intrinsic::umul_with_overflow;
1785       break;
1786     }
1787 
1788     llvm::Value *Left = EmitScalarExpr(LeftArg);
1789     llvm::Value *Right = EmitScalarExpr(RightArg);
1790     Address ResultPtr = EmitPointerWithAlignment(ResultArg);
1791 
1792     // Extend each operand to the encompassing type.
1793     Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
1794     Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
1795 
1796     // Perform the operation on the extended values.
1797     llvm::Value *Overflow, *Result;
1798     Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
1799 
1800     if (EncompassingInfo.Width > ResultInfo.Width) {
1801       // The encompassing type is wider than the result type, so we need to
1802       // truncate it.
1803       llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
1804 
1805       // To see if the truncation caused an overflow, we will extend
1806       // the result and then compare it to the original result.
1807       llvm::Value *ResultTruncExt = Builder.CreateIntCast(
1808           ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
1809       llvm::Value *TruncationOverflow =
1810           Builder.CreateICmpNE(Result, ResultTruncExt);
1811 
1812       Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
1813       Result = ResultTrunc;
1814     }
1815 
1816     // Finally, store the result using the pointer.
1817     bool isVolatile =
1818       ResultArg->getType()->getPointeeType().isVolatileQualified();
1819     Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
1820 
1821     return RValue::get(Overflow);
1822   }
1823 
1824   case Builtin::BI__builtin_uadd_overflow:
1825   case Builtin::BI__builtin_uaddl_overflow:
1826   case Builtin::BI__builtin_uaddll_overflow:
1827   case Builtin::BI__builtin_usub_overflow:
1828   case Builtin::BI__builtin_usubl_overflow:
1829   case Builtin::BI__builtin_usubll_overflow:
1830   case Builtin::BI__builtin_umul_overflow:
1831   case Builtin::BI__builtin_umull_overflow:
1832   case Builtin::BI__builtin_umulll_overflow:
1833   case Builtin::BI__builtin_sadd_overflow:
1834   case Builtin::BI__builtin_saddl_overflow:
1835   case Builtin::BI__builtin_saddll_overflow:
1836   case Builtin::BI__builtin_ssub_overflow:
1837   case Builtin::BI__builtin_ssubl_overflow:
1838   case Builtin::BI__builtin_ssubll_overflow:
1839   case Builtin::BI__builtin_smul_overflow:
1840   case Builtin::BI__builtin_smull_overflow:
1841   case Builtin::BI__builtin_smulll_overflow: {
1842 
1843     // We translate all of these builtins directly to the relevant llvm IR node.
1844 
1845     // Scalarize our inputs.
1846     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1847     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1848     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
1849 
1850     // Decide which of the overflow intrinsics we are lowering to:
1851     llvm::Intrinsic::ID IntrinsicId;
1852     switch (BuiltinID) {
1853     default: llvm_unreachable("Unknown overflow builtin id.");
1854     case Builtin::BI__builtin_uadd_overflow:
1855     case Builtin::BI__builtin_uaddl_overflow:
1856     case Builtin::BI__builtin_uaddll_overflow:
1857       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1858       break;
1859     case Builtin::BI__builtin_usub_overflow:
1860     case Builtin::BI__builtin_usubl_overflow:
1861     case Builtin::BI__builtin_usubll_overflow:
1862       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1863       break;
1864     case Builtin::BI__builtin_umul_overflow:
1865     case Builtin::BI__builtin_umull_overflow:
1866     case Builtin::BI__builtin_umulll_overflow:
1867       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
1868       break;
1869     case Builtin::BI__builtin_sadd_overflow:
1870     case Builtin::BI__builtin_saddl_overflow:
1871     case Builtin::BI__builtin_saddll_overflow:
1872       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
1873       break;
1874     case Builtin::BI__builtin_ssub_overflow:
1875     case Builtin::BI__builtin_ssubl_overflow:
1876     case Builtin::BI__builtin_ssubll_overflow:
1877       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
1878       break;
1879     case Builtin::BI__builtin_smul_overflow:
1880     case Builtin::BI__builtin_smull_overflow:
1881     case Builtin::BI__builtin_smulll_overflow:
1882       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
1883       break;
1884     }
1885 
1886 
1887     llvm::Value *Carry;
1888     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
1889     Builder.CreateStore(Sum, SumOutPtr);
1890 
1891     return RValue::get(Carry);
1892   }
1893   case Builtin::BI__builtin_addressof:
1894     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
1895   case Builtin::BI__builtin_operator_new:
1896     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
1897                                     E->getArg(0), false);
1898   case Builtin::BI__builtin_operator_delete:
1899     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
1900                                     E->getArg(0), true);
1901   case Builtin::BI__noop:
1902     // __noop always evaluates to an integer literal zero.
1903     return RValue::get(ConstantInt::get(IntTy, 0));
1904   case Builtin::BI__builtin_call_with_static_chain: {
1905     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
1906     const Expr *Chain = E->getArg(1);
1907     return EmitCall(Call->getCallee()->getType(),
1908                     EmitScalarExpr(Call->getCallee()), Call, ReturnValue,
1909                     Call->getCalleeDecl(), EmitScalarExpr(Chain));
1910   }
1911   case Builtin::BI_InterlockedExchange:
1912   case Builtin::BI_InterlockedExchangePointer:
1913     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1914   case Builtin::BI_InterlockedCompareExchangePointer: {
1915     llvm::Type *RTy;
1916     llvm::IntegerType *IntType =
1917       IntegerType::get(getLLVMContext(),
1918                        getContext().getTypeSize(E->getType()));
1919     llvm::Type *IntPtrType = IntType->getPointerTo();
1920 
1921     llvm::Value *Destination =
1922       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
1923 
1924     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
1925     RTy = Exchange->getType();
1926     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
1927 
1928     llvm::Value *Comparand =
1929       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
1930 
1931     auto Result =
1932         Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
1933                                     AtomicOrdering::SequentiallyConsistent,
1934                                     AtomicOrdering::SequentiallyConsistent);
1935     Result->setVolatile(true);
1936 
1937     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
1938                                                                          0),
1939                                               RTy));
1940   }
1941   case Builtin::BI_InterlockedCompareExchange: {
1942     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
1943         EmitScalarExpr(E->getArg(0)),
1944         EmitScalarExpr(E->getArg(2)),
1945         EmitScalarExpr(E->getArg(1)),
1946         AtomicOrdering::SequentiallyConsistent,
1947         AtomicOrdering::SequentiallyConsistent);
1948       CXI->setVolatile(true);
1949       return RValue::get(Builder.CreateExtractValue(CXI, 0));
1950   }
1951   case Builtin::BI_InterlockedIncrement: {
1952     llvm::Type *IntTy = ConvertType(E->getType());
1953     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1954       AtomicRMWInst::Add,
1955       EmitScalarExpr(E->getArg(0)),
1956       ConstantInt::get(IntTy, 1),
1957       llvm::AtomicOrdering::SequentiallyConsistent);
1958     RMWI->setVolatile(true);
1959     return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)));
1960   }
1961   case Builtin::BI_InterlockedDecrement: {
1962     llvm::Type *IntTy = ConvertType(E->getType());
1963     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1964       AtomicRMWInst::Sub,
1965       EmitScalarExpr(E->getArg(0)),
1966       ConstantInt::get(IntTy, 1),
1967       llvm::AtomicOrdering::SequentiallyConsistent);
1968     RMWI->setVolatile(true);
1969     return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)));
1970   }
1971   case Builtin::BI_InterlockedExchangeAdd: {
1972     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1973       AtomicRMWInst::Add,
1974       EmitScalarExpr(E->getArg(0)),
1975       EmitScalarExpr(E->getArg(1)),
1976       llvm::AtomicOrdering::SequentiallyConsistent);
1977     RMWI->setVolatile(true);
1978     return RValue::get(RMWI);
1979   }
1980   case Builtin::BI__readfsdword: {
1981     llvm::Type *IntTy = ConvertType(E->getType());
1982     Value *IntToPtr =
1983       Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
1984                              llvm::PointerType::get(IntTy, 257));
1985     LoadInst *Load =
1986         Builder.CreateDefaultAlignedLoad(IntToPtr, /*isVolatile=*/true);
1987     return RValue::get(Load);
1988   }
1989 
1990   case Builtin::BI__exception_code:
1991   case Builtin::BI_exception_code:
1992     return RValue::get(EmitSEHExceptionCode());
1993   case Builtin::BI__exception_info:
1994   case Builtin::BI_exception_info:
1995     return RValue::get(EmitSEHExceptionInfo());
1996   case Builtin::BI__abnormal_termination:
1997   case Builtin::BI_abnormal_termination:
1998     return RValue::get(EmitSEHAbnormalTermination());
1999   case Builtin::BI_setjmpex: {
2000     if (getTarget().getTriple().isOSMSVCRT()) {
2001       llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2002       llvm::AttributeSet ReturnsTwiceAttr =
2003           AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
2004                             llvm::Attribute::ReturnsTwice);
2005       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2006           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2007           "_setjmpex", ReturnsTwiceAttr);
2008       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2009           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2010       llvm::Value *FrameAddr =
2011           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2012                              ConstantInt::get(Int32Ty, 0));
2013       llvm::Value *Args[] = {Buf, FrameAddr};
2014       llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2015       CS.setAttributes(ReturnsTwiceAttr);
2016       return RValue::get(CS.getInstruction());
2017     }
2018     break;
2019   }
2020   case Builtin::BI_setjmp: {
2021     if (getTarget().getTriple().isOSMSVCRT()) {
2022       llvm::AttributeSet ReturnsTwiceAttr =
2023           AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
2024                             llvm::Attribute::ReturnsTwice);
2025       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2026           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2027       llvm::CallSite CS;
2028       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2029         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2030         llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2031             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2032             "_setjmp3", ReturnsTwiceAttr);
2033         llvm::Value *Count = ConstantInt::get(IntTy, 0);
2034         llvm::Value *Args[] = {Buf, Count};
2035         CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2036       } else {
2037         llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2038         llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2039             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2040             "_setjmp", ReturnsTwiceAttr);
2041         llvm::Value *FrameAddr =
2042             Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2043                                ConstantInt::get(Int32Ty, 0));
2044         llvm::Value *Args[] = {Buf, FrameAddr};
2045         CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2046       }
2047       CS.setAttributes(ReturnsTwiceAttr);
2048       return RValue::get(CS.getInstruction());
2049     }
2050     break;
2051   }
2052 
2053   case Builtin::BI__GetExceptionInfo: {
2054     if (llvm::GlobalVariable *GV =
2055             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2056       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2057     break;
2058   }
2059 
2060   // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2061   case Builtin::BIread_pipe:
2062   case Builtin::BIwrite_pipe: {
2063     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2064           *Arg1 = EmitScalarExpr(E->getArg(1));
2065 
2066     // Type of the generic packet parameter.
2067     unsigned GenericAS =
2068         getContext().getTargetAddressSpace(LangAS::opencl_generic);
2069     llvm::Type *I8PTy = llvm::PointerType::get(
2070         llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2071 
2072     // Testing which overloaded version we should generate the call for.
2073     if (2U == E->getNumArgs()) {
2074       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2075                                                              : "__write_pipe_2";
2076       // Creating a generic function type to be able to call with any builtin or
2077       // user defined type.
2078       llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy};
2079       llvm::FunctionType *FTy = llvm::FunctionType::get(
2080           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2081       Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2082       return RValue::get(Builder.CreateCall(
2083           CGM.CreateRuntimeFunction(FTy, Name), {Arg0, BCast}));
2084     } else {
2085       assert(4 == E->getNumArgs() &&
2086              "Illegal number of parameters to pipe function");
2087       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2088                                                              : "__write_pipe_4";
2089 
2090       llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy};
2091       Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2092             *Arg3 = EmitScalarExpr(E->getArg(3));
2093       llvm::FunctionType *FTy = llvm::FunctionType::get(
2094           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2095       Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2096       // We know the third argument is an integer type, but we may need to cast
2097       // it to i32.
2098       if (Arg2->getType() != Int32Ty)
2099         Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2100       return RValue::get(Builder.CreateCall(
2101           CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1, Arg2, BCast}));
2102     }
2103   }
2104   // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2105   // functions
2106   case Builtin::BIreserve_read_pipe:
2107   case Builtin::BIreserve_write_pipe:
2108   case Builtin::BIwork_group_reserve_read_pipe:
2109   case Builtin::BIwork_group_reserve_write_pipe:
2110   case Builtin::BIsub_group_reserve_read_pipe:
2111   case Builtin::BIsub_group_reserve_write_pipe: {
2112     // Composing the mangled name for the function.
2113     const char *Name;
2114     if (BuiltinID == Builtin::BIreserve_read_pipe)
2115       Name = "__reserve_read_pipe";
2116     else if (BuiltinID == Builtin::BIreserve_write_pipe)
2117       Name = "__reserve_write_pipe";
2118     else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2119       Name = "__work_group_reserve_read_pipe";
2120     else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2121       Name = "__work_group_reserve_write_pipe";
2122     else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2123       Name = "__sub_group_reserve_read_pipe";
2124     else
2125       Name = "__sub_group_reserve_write_pipe";
2126 
2127     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2128           *Arg1 = EmitScalarExpr(E->getArg(1));
2129     llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2130 
2131     // Building the generic function prototype.
2132     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty};
2133     llvm::FunctionType *FTy = llvm::FunctionType::get(
2134         ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2135     // We know the second argument is an integer type, but we may need to cast
2136     // it to i32.
2137     if (Arg1->getType() != Int32Ty)
2138       Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2139     return RValue::get(
2140         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1}));
2141   }
2142   // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2143   // functions
2144   case Builtin::BIcommit_read_pipe:
2145   case Builtin::BIcommit_write_pipe:
2146   case Builtin::BIwork_group_commit_read_pipe:
2147   case Builtin::BIwork_group_commit_write_pipe:
2148   case Builtin::BIsub_group_commit_read_pipe:
2149   case Builtin::BIsub_group_commit_write_pipe: {
2150     const char *Name;
2151     if (BuiltinID == Builtin::BIcommit_read_pipe)
2152       Name = "__commit_read_pipe";
2153     else if (BuiltinID == Builtin::BIcommit_write_pipe)
2154       Name = "__commit_write_pipe";
2155     else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2156       Name = "__work_group_commit_read_pipe";
2157     else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2158       Name = "__work_group_commit_write_pipe";
2159     else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2160       Name = "__sub_group_commit_read_pipe";
2161     else
2162       Name = "__sub_group_commit_write_pipe";
2163 
2164     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2165           *Arg1 = EmitScalarExpr(E->getArg(1));
2166 
2167     // Building the generic function prototype.
2168     llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType()};
2169     llvm::FunctionType *FTy =
2170         llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2171                                 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2172 
2173     return RValue::get(
2174         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1}));
2175   }
2176   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2177   case Builtin::BIget_pipe_num_packets:
2178   case Builtin::BIget_pipe_max_packets: {
2179     const char *Name;
2180     if (BuiltinID == Builtin::BIget_pipe_num_packets)
2181       Name = "__get_pipe_num_packets";
2182     else
2183       Name = "__get_pipe_max_packets";
2184 
2185     // Building the generic function prototype.
2186     Value *Arg0 = EmitScalarExpr(E->getArg(0));
2187     llvm::Type *ArgTys[] = {Arg0->getType()};
2188     llvm::FunctionType *FTy = llvm::FunctionType::get(
2189         Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2190 
2191     return RValue::get(
2192         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0}));
2193   }
2194 
2195   // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2196   case Builtin::BIto_global:
2197   case Builtin::BIto_local:
2198   case Builtin::BIto_private: {
2199     auto Arg0 = EmitScalarExpr(E->getArg(0));
2200     auto NewArgT = llvm::PointerType::get(Int8Ty,
2201       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2202     auto NewRetT = llvm::PointerType::get(Int8Ty,
2203       CGM.getContext().getTargetAddressSpace(
2204         E->getType()->getPointeeType().getAddressSpace()));
2205     auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2206     llvm::Value *NewArg;
2207     if (Arg0->getType()->getPointerAddressSpace() !=
2208         NewArgT->getPointerAddressSpace())
2209       NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2210     else
2211       NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2212     auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2213     auto NewCall =
2214         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2215     return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2216       ConvertType(E->getType())));
2217   }
2218 
2219   // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2220   // It contains four different overload formats specified in Table 6.13.17.1.
2221   case Builtin::BIenqueue_kernel: {
2222     StringRef Name; // Generated function call name
2223     unsigned NumArgs = E->getNumArgs();
2224 
2225     llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2226     llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy);
2227 
2228     llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2229     llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2230     llvm::Value *Range = EmitScalarExpr(E->getArg(2));
2231 
2232     if (NumArgs == 4) {
2233       // The most basic form of the call with parameters:
2234       // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2235       Name = "__enqueue_kernel_basic";
2236       llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy};
2237       llvm::FunctionType *FTy = llvm::FunctionType::get(
2238           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
2239 
2240       llvm::Value *Block =
2241           Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
2242 
2243       return RValue::get(Builder.CreateCall(
2244           CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block}));
2245     }
2246     assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2247 
2248     // Could have events and/or vaargs.
2249     if (E->getArg(3)->getType()->isBlockPointerType()) {
2250       // No events passed, but has variadic arguments.
2251       Name = "__enqueue_kernel_vaargs";
2252       llvm::Value *Block =
2253           Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
2254       // Create a vector of the arguments, as well as a constant value to
2255       // express to the runtime the number of variadic arguments.
2256       std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
2257                                          ConstantInt::get(IntTy, NumArgs - 4)};
2258       std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy,
2259                                           IntTy};
2260 
2261       // Add the variadics.
2262       for (unsigned I = 4; I < NumArgs; ++I) {
2263         llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I));
2264         unsigned TypeSizeInBytes =
2265             getContext()
2266                 .getTypeSizeInChars(E->getArg(I)->getType())
2267                 .getQuantity();
2268         Args.push_back(TypeSizeInBytes < 4
2269                            ? Builder.CreateZExt(ArgSize, Int32Ty)
2270                            : ArgSize);
2271       }
2272 
2273       llvm::FunctionType *FTy = llvm::FunctionType::get(
2274           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2275       return RValue::get(
2276           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2277                              llvm::ArrayRef<llvm::Value *>(Args)));
2278     }
2279     // Any calls now have event arguments passed.
2280     if (NumArgs >= 7) {
2281       llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2282       unsigned AS4 =
2283           E->getArg(4)->getType()->isArrayType()
2284               ? E->getArg(4)->getType().getAddressSpace()
2285               : E->getArg(4)->getType()->getPointeeType().getAddressSpace();
2286       llvm::Type *EventPtrAS4Ty =
2287           EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS4));
2288       unsigned AS5 =
2289           E->getArg(5)->getType()->getPointeeType().getAddressSpace();
2290       llvm::Type *EventPtrAS5Ty =
2291           EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS5));
2292 
2293       llvm::Value *NumEvents = EmitScalarExpr(E->getArg(3));
2294       llvm::Value *EventList =
2295           E->getArg(4)->getType()->isArrayType()
2296               ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2297               : EmitScalarExpr(E->getArg(4));
2298       llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2299       llvm::Value *Block =
2300           Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy);
2301 
2302       std::vector<llvm::Type *> ArgTys = {
2303           QueueTy,       Int32Ty,       RangeTy,  Int32Ty,
2304           EventPtrAS4Ty, EventPtrAS5Ty, Int8PtrTy};
2305       std::vector<llvm::Value *> Args = {Queue,     Flags,    Range, NumEvents,
2306                                          EventList, ClkEvent, Block};
2307 
2308       if (NumArgs == 7) {
2309         // Has events but no variadics.
2310         Name = "__enqueue_kernel_basic_events";
2311         llvm::FunctionType *FTy = llvm::FunctionType::get(
2312             Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2313         return RValue::get(
2314             Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2315                                llvm::ArrayRef<llvm::Value *>(Args)));
2316       }
2317       // Has event info and variadics
2318       // Pass the number of variadics to the runtime function too.
2319       Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2320       ArgTys.push_back(Int32Ty);
2321       Name = "__enqueue_kernel_events_vaargs";
2322 
2323       // Add the variadics.
2324       for (unsigned I = 7; I < NumArgs; ++I) {
2325         llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I));
2326         unsigned TypeSizeInBytes =
2327             getContext()
2328                 .getTypeSizeInChars(E->getArg(I)->getType())
2329                 .getQuantity();
2330         Args.push_back(TypeSizeInBytes < 4
2331                            ? Builder.CreateZExt(ArgSize, Int32Ty)
2332                            : ArgSize);
2333       }
2334       llvm::FunctionType *FTy = llvm::FunctionType::get(
2335           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2336       return RValue::get(
2337           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2338                              llvm::ArrayRef<llvm::Value *>(Args)));
2339     }
2340   }
2341   // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2342   // parameter.
2343   case Builtin::BIget_kernel_work_group_size: {
2344     Value *Arg = EmitScalarExpr(E->getArg(0));
2345     Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
2346     return RValue::get(
2347         Builder.CreateCall(CGM.CreateRuntimeFunction(
2348                                llvm::FunctionType::get(IntTy, Int8PtrTy, false),
2349                                "__get_kernel_work_group_size_impl"),
2350                            Arg));
2351   }
2352   case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
2353     Value *Arg = EmitScalarExpr(E->getArg(0));
2354     Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
2355     return RValue::get(Builder.CreateCall(
2356         CGM.CreateRuntimeFunction(
2357             llvm::FunctionType::get(IntTy, Int8PtrTy, false),
2358             "__get_kernel_preferred_work_group_multiple_impl"),
2359         Arg));
2360   }
2361   case Builtin::BIprintf:
2362     if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
2363       return EmitCUDADevicePrintfCallExpr(E, ReturnValue);
2364     break;
2365   case Builtin::BI__builtin_canonicalize:
2366   case Builtin::BI__builtin_canonicalizef:
2367   case Builtin::BI__builtin_canonicalizel:
2368     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2369 
2370   case Builtin::BI__builtin_thread_pointer: {
2371     if (!getContext().getTargetInfo().isTLSSupported())
2372       CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
2373     // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
2374     break;
2375   }
2376   }
2377 
2378   // If this is an alias for a lib function (e.g. __builtin_sin), emit
2379   // the call using the normal call path, but using the unmangled
2380   // version of the function name.
2381   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
2382     return emitLibraryCall(*this, FD, E,
2383                            CGM.getBuiltinLibFunction(FD, BuiltinID));
2384 
2385   // If this is a predefined lib function (e.g. malloc), emit the call
2386   // using exactly the normal call path.
2387   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
2388     return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
2389 
2390   // Check that a call to a target specific builtin has the correct target
2391   // features.
2392   // This is down here to avoid non-target specific builtins, however, if
2393   // generic builtins start to require generic target features then we
2394   // can move this up to the beginning of the function.
2395   checkTargetFeatures(E, FD);
2396 
2397   // See if we have a target specific intrinsic.
2398   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
2399   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
2400   if (const char *Prefix =
2401           llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) {
2402     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
2403     // NOTE we dont need to perform a compatibility flag check here since the
2404     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
2405     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
2406     if (IntrinsicID == Intrinsic::not_intrinsic)
2407       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix, Name);
2408   }
2409 
2410   if (IntrinsicID != Intrinsic::not_intrinsic) {
2411     SmallVector<Value*, 16> Args;
2412 
2413     // Find out if any arguments are required to be integer constant
2414     // expressions.
2415     unsigned ICEArguments = 0;
2416     ASTContext::GetBuiltinTypeError Error;
2417     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2418     assert(Error == ASTContext::GE_None && "Should not codegen an error");
2419 
2420     Function *F = CGM.getIntrinsic(IntrinsicID);
2421     llvm::FunctionType *FTy = F->getFunctionType();
2422 
2423     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
2424       Value *ArgValue;
2425       // If this is a normal argument, just emit it as a scalar.
2426       if ((ICEArguments & (1 << i)) == 0) {
2427         ArgValue = EmitScalarExpr(E->getArg(i));
2428       } else {
2429         // If this is required to be a constant, constant fold it so that we
2430         // know that the generated intrinsic gets a ConstantInt.
2431         llvm::APSInt Result;
2432         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2433         assert(IsConst && "Constant arg isn't actually constant?");
2434         (void)IsConst;
2435         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2436       }
2437 
2438       // If the intrinsic arg type is different from the builtin arg type
2439       // we need to do a bit cast.
2440       llvm::Type *PTy = FTy->getParamType(i);
2441       if (PTy != ArgValue->getType()) {
2442         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2443                "Must be able to losslessly bit cast to param");
2444         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2445       }
2446 
2447       Args.push_back(ArgValue);
2448     }
2449 
2450     Value *V = Builder.CreateCall(F, Args);
2451     QualType BuiltinRetType = E->getType();
2452 
2453     llvm::Type *RetTy = VoidTy;
2454     if (!BuiltinRetType->isVoidType())
2455       RetTy = ConvertType(BuiltinRetType);
2456 
2457     if (RetTy != V->getType()) {
2458       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
2459              "Must be able to losslessly bit cast result type");
2460       V = Builder.CreateBitCast(V, RetTy);
2461     }
2462 
2463     return RValue::get(V);
2464   }
2465 
2466   // See if we have a target specific builtin that needs to be lowered.
2467   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
2468     return RValue::get(V);
2469 
2470   ErrorUnsupported(E, "builtin function");
2471 
2472   // Unknown builtin, for now just dump it out and return undef.
2473   return GetUndefRValue(E->getType());
2474 }
2475 
2476 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
2477                                         unsigned BuiltinID, const CallExpr *E,
2478                                         llvm::Triple::ArchType Arch) {
2479   switch (Arch) {
2480   case llvm::Triple::arm:
2481   case llvm::Triple::armeb:
2482   case llvm::Triple::thumb:
2483   case llvm::Triple::thumbeb:
2484     return CGF->EmitARMBuiltinExpr(BuiltinID, E);
2485   case llvm::Triple::aarch64:
2486   case llvm::Triple::aarch64_be:
2487     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
2488   case llvm::Triple::x86:
2489   case llvm::Triple::x86_64:
2490     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
2491   case llvm::Triple::ppc:
2492   case llvm::Triple::ppc64:
2493   case llvm::Triple::ppc64le:
2494     return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
2495   case llvm::Triple::r600:
2496   case llvm::Triple::amdgcn:
2497     return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
2498   case llvm::Triple::systemz:
2499     return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
2500   case llvm::Triple::nvptx:
2501   case llvm::Triple::nvptx64:
2502     return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
2503   case llvm::Triple::wasm32:
2504   case llvm::Triple::wasm64:
2505     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
2506   default:
2507     return nullptr;
2508   }
2509 }
2510 
2511 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
2512                                               const CallExpr *E) {
2513   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
2514     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
2515     return EmitTargetArchBuiltinExpr(
2516         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
2517         getContext().getAuxTargetInfo()->getTriple().getArch());
2518   }
2519 
2520   return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
2521                                    getTarget().getTriple().getArch());
2522 }
2523 
2524 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
2525                                      NeonTypeFlags TypeFlags,
2526                                      bool V1Ty=false) {
2527   int IsQuad = TypeFlags.isQuad();
2528   switch (TypeFlags.getEltType()) {
2529   case NeonTypeFlags::Int8:
2530   case NeonTypeFlags::Poly8:
2531     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
2532   case NeonTypeFlags::Int16:
2533   case NeonTypeFlags::Poly16:
2534   case NeonTypeFlags::Float16:
2535     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
2536   case NeonTypeFlags::Int32:
2537     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
2538   case NeonTypeFlags::Int64:
2539   case NeonTypeFlags::Poly64:
2540     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
2541   case NeonTypeFlags::Poly128:
2542     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
2543     // There is a lot of i128 and f128 API missing.
2544     // so we use v16i8 to represent poly128 and get pattern matched.
2545     return llvm::VectorType::get(CGF->Int8Ty, 16);
2546   case NeonTypeFlags::Float32:
2547     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
2548   case NeonTypeFlags::Float64:
2549     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
2550   }
2551   llvm_unreachable("Unknown vector element type!");
2552 }
2553 
2554 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
2555                                           NeonTypeFlags IntTypeFlags) {
2556   int IsQuad = IntTypeFlags.isQuad();
2557   switch (IntTypeFlags.getEltType()) {
2558   case NeonTypeFlags::Int32:
2559     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
2560   case NeonTypeFlags::Int64:
2561     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
2562   default:
2563     llvm_unreachable("Type can't be converted to floating-point!");
2564   }
2565 }
2566 
2567 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
2568   unsigned nElts = V->getType()->getVectorNumElements();
2569   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
2570   return Builder.CreateShuffleVector(V, V, SV, "lane");
2571 }
2572 
2573 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
2574                                      const char *name,
2575                                      unsigned shift, bool rightshift) {
2576   unsigned j = 0;
2577   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
2578        ai != ae; ++ai, ++j)
2579     if (shift > 0 && shift == j)
2580       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
2581     else
2582       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
2583 
2584   return Builder.CreateCall(F, Ops, name);
2585 }
2586 
2587 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
2588                                             bool neg) {
2589   int SV = cast<ConstantInt>(V)->getSExtValue();
2590   return ConstantInt::get(Ty, neg ? -SV : SV);
2591 }
2592 
2593 // \brief Right-shift a vector by a constant.
2594 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
2595                                           llvm::Type *Ty, bool usgn,
2596                                           const char *name) {
2597   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
2598 
2599   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
2600   int EltSize = VTy->getScalarSizeInBits();
2601 
2602   Vec = Builder.CreateBitCast(Vec, Ty);
2603 
2604   // lshr/ashr are undefined when the shift amount is equal to the vector
2605   // element size.
2606   if (ShiftAmt == EltSize) {
2607     if (usgn) {
2608       // Right-shifting an unsigned value by its size yields 0.
2609       return llvm::ConstantAggregateZero::get(VTy);
2610     } else {
2611       // Right-shifting a signed value by its size is equivalent
2612       // to a shift of size-1.
2613       --ShiftAmt;
2614       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
2615     }
2616   }
2617 
2618   Shift = EmitNeonShiftVector(Shift, Ty, false);
2619   if (usgn)
2620     return Builder.CreateLShr(Vec, Shift, name);
2621   else
2622     return Builder.CreateAShr(Vec, Shift, name);
2623 }
2624 
2625 enum {
2626   AddRetType = (1 << 0),
2627   Add1ArgType = (1 << 1),
2628   Add2ArgTypes = (1 << 2),
2629 
2630   VectorizeRetType = (1 << 3),
2631   VectorizeArgTypes = (1 << 4),
2632 
2633   InventFloatType = (1 << 5),
2634   UnsignedAlts = (1 << 6),
2635 
2636   Use64BitVectors = (1 << 7),
2637   Use128BitVectors = (1 << 8),
2638 
2639   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
2640   VectorRet = AddRetType | VectorizeRetType,
2641   VectorRetGetArgs01 =
2642       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
2643   FpCmpzModifiers =
2644       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
2645 };
2646 
2647 namespace {
2648 struct NeonIntrinsicInfo {
2649   const char *NameHint;
2650   unsigned BuiltinID;
2651   unsigned LLVMIntrinsic;
2652   unsigned AltLLVMIntrinsic;
2653   unsigned TypeModifier;
2654 
2655   bool operator<(unsigned RHSBuiltinID) const {
2656     return BuiltinID < RHSBuiltinID;
2657   }
2658   bool operator<(const NeonIntrinsicInfo &TE) const {
2659     return BuiltinID < TE.BuiltinID;
2660   }
2661 };
2662 } // end anonymous namespace
2663 
2664 #define NEONMAP0(NameBase) \
2665   { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
2666 
2667 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
2668   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
2669       Intrinsic::LLVMIntrinsic, 0, TypeModifier }
2670 
2671 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
2672   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
2673       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
2674       TypeModifier }
2675 
2676 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
2677   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2678   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2679   NEONMAP1(vabs_v, arm_neon_vabs, 0),
2680   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
2681   NEONMAP0(vaddhn_v),
2682   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
2683   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
2684   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
2685   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
2686   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
2687   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
2688   NEONMAP1(vcage_v, arm_neon_vacge, 0),
2689   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
2690   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
2691   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
2692   NEONMAP1(vcale_v, arm_neon_vacge, 0),
2693   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
2694   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
2695   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
2696   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
2697   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
2698   NEONMAP1(vclz_v, ctlz, Add1ArgType),
2699   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2700   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2701   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2702   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
2703   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
2704   NEONMAP0(vcvt_f32_v),
2705   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2706   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2707   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2708   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2709   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2710   NEONMAP0(vcvt_s32_v),
2711   NEONMAP0(vcvt_s64_v),
2712   NEONMAP0(vcvt_u32_v),
2713   NEONMAP0(vcvt_u64_v),
2714   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
2715   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
2716   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
2717   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
2718   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
2719   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
2720   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
2721   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
2722   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
2723   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
2724   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
2725   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
2726   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
2727   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
2728   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
2729   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
2730   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
2731   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
2732   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
2733   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
2734   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
2735   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
2736   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
2737   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
2738   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
2739   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
2740   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
2741   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
2742   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
2743   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
2744   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
2745   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
2746   NEONMAP0(vcvtq_f32_v),
2747   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2748   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2749   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2750   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2751   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2752   NEONMAP0(vcvtq_s32_v),
2753   NEONMAP0(vcvtq_s64_v),
2754   NEONMAP0(vcvtq_u32_v),
2755   NEONMAP0(vcvtq_u64_v),
2756   NEONMAP0(vext_v),
2757   NEONMAP0(vextq_v),
2758   NEONMAP0(vfma_v),
2759   NEONMAP0(vfmaq_v),
2760   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2761   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2762   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2763   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2764   NEONMAP0(vld1_dup_v),
2765   NEONMAP1(vld1_v, arm_neon_vld1, 0),
2766   NEONMAP0(vld1q_dup_v),
2767   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
2768   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
2769   NEONMAP1(vld2_v, arm_neon_vld2, 0),
2770   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
2771   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
2772   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
2773   NEONMAP1(vld3_v, arm_neon_vld3, 0),
2774   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
2775   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
2776   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
2777   NEONMAP1(vld4_v, arm_neon_vld4, 0),
2778   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
2779   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
2780   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
2781   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
2782   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
2783   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
2784   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
2785   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
2786   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
2787   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
2788   NEONMAP0(vmovl_v),
2789   NEONMAP0(vmovn_v),
2790   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
2791   NEONMAP0(vmull_v),
2792   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
2793   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
2794   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
2795   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
2796   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
2797   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
2798   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
2799   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
2800   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
2801   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
2802   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
2803   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
2804   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
2805   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
2806   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
2807   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
2808   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
2809   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
2810   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
2811   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
2812   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
2813   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
2814   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
2815   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
2816   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
2817   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
2818   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
2819   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
2820   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
2821   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
2822   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
2823   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
2824   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
2825   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
2826   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
2827   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
2828   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
2829   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
2830   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
2831   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
2832   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
2833   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
2834   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
2835   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
2836   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
2837   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
2838   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
2839   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
2840   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
2841   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
2842   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
2843   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
2844   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
2845   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
2846   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
2847   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
2848   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
2849   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
2850   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
2851   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
2852   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
2853   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
2854   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
2855   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
2856   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
2857   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
2858   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
2859   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
2860   NEONMAP0(vshl_n_v),
2861   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
2862   NEONMAP0(vshll_n_v),
2863   NEONMAP0(vshlq_n_v),
2864   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
2865   NEONMAP0(vshr_n_v),
2866   NEONMAP0(vshrn_n_v),
2867   NEONMAP0(vshrq_n_v),
2868   NEONMAP1(vst1_v, arm_neon_vst1, 0),
2869   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
2870   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
2871   NEONMAP1(vst2_v, arm_neon_vst2, 0),
2872   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
2873   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
2874   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
2875   NEONMAP1(vst3_v, arm_neon_vst3, 0),
2876   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
2877   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
2878   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
2879   NEONMAP1(vst4_v, arm_neon_vst4, 0),
2880   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
2881   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
2882   NEONMAP0(vsubhn_v),
2883   NEONMAP0(vtrn_v),
2884   NEONMAP0(vtrnq_v),
2885   NEONMAP0(vtst_v),
2886   NEONMAP0(vtstq_v),
2887   NEONMAP0(vuzp_v),
2888   NEONMAP0(vuzpq_v),
2889   NEONMAP0(vzip_v),
2890   NEONMAP0(vzipq_v)
2891 };
2892 
2893 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
2894   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
2895   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
2896   NEONMAP0(vaddhn_v),
2897   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
2898   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
2899   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
2900   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
2901   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
2902   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
2903   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
2904   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
2905   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
2906   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
2907   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
2908   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
2909   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
2910   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
2911   NEONMAP1(vclz_v, ctlz, Add1ArgType),
2912   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2913   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2914   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2915   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
2916   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
2917   NEONMAP0(vcvt_f32_v),
2918   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2919   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2920   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
2921   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
2922   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
2923   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
2924   NEONMAP0(vcvtq_f32_v),
2925   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2926   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2927   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
2928   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
2929   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
2930   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
2931   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
2932   NEONMAP0(vext_v),
2933   NEONMAP0(vextq_v),
2934   NEONMAP0(vfma_v),
2935   NEONMAP0(vfmaq_v),
2936   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
2937   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
2938   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
2939   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
2940   NEONMAP0(vmovl_v),
2941   NEONMAP0(vmovn_v),
2942   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
2943   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
2944   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
2945   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
2946   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
2947   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
2948   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
2949   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
2950   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
2951   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
2952   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
2953   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
2954   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
2955   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
2956   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
2957   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
2958   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
2959   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
2960   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
2961   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
2962   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
2963   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
2964   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
2965   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
2966   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
2967   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
2968   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
2969   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
2970   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
2971   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
2972   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
2973   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
2974   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
2975   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
2976   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
2977   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
2978   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
2979   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
2980   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
2981   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
2982   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
2983   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
2984   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
2985   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
2986   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
2987   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
2988   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
2989   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
2990   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
2991   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
2992   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
2993   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
2994   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
2995   NEONMAP0(vshl_n_v),
2996   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
2997   NEONMAP0(vshll_n_v),
2998   NEONMAP0(vshlq_n_v),
2999   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3000   NEONMAP0(vshr_n_v),
3001   NEONMAP0(vshrn_n_v),
3002   NEONMAP0(vshrq_n_v),
3003   NEONMAP0(vsubhn_v),
3004   NEONMAP0(vtst_v),
3005   NEONMAP0(vtstq_v),
3006 };
3007 
3008 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3009   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3010   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3011   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3012   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3013   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3014   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3015   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3016   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3017   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3018   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3019   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3020   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3021   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3022   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3023   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3024   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3025   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3026   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3027   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3028   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3029   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3030   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3031   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3032   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3033   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3034   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3035   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3036   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3037   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3038   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3039   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3040   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3041   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3042   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3043   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3044   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3045   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3046   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3047   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3048   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3049   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3050   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3051   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3052   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3053   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3054   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3055   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3056   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3057   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3058   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3059   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3060   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3061   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3062   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3063   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3064   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3065   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3066   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3067   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3068   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3069   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3070   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3071   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3072   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3073   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3074   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3075   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3076   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3077   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3078   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3079   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3080   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3081   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3082   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3083   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3084   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3085   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3086   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3087   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3088   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3089   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3090   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3091   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3092   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3093   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3094   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3095   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3096   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3097   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3098   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3099   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3100   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3101   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3102   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3103   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3104   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3105   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3106   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3107   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3108   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3109   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3110   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3111   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3112   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3113   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3114   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3115   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3116   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3117   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3118   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3119   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3120   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3121   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3122   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3123   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3124   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3125   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3126   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3127   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3128   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3129   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3130   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3131   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3132   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3133   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3134   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3135   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3136   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3137   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3138   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3139   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3140   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3141   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3142   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3143   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3144   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3145   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3146   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3147   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3148   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3149   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3150   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3151   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3152   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3153   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3154   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3155   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3156   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3157   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3158   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3159   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3160   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3161   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3162   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3163   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3164   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3165   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3166   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3167   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3168   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3169   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3170   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3171   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3172   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3173   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3174   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3175   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3176   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3177   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3178   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3179   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3180   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3181   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3182   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3183   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3184   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3185   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3186   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3187   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3188   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3189   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3190   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3191   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3192   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3193   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3194   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3195   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3196   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3197   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3198   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3199   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3200   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3201 };
3202 
3203 #undef NEONMAP0
3204 #undef NEONMAP1
3205 #undef NEONMAP2
3206 
3207 static bool NEONSIMDIntrinsicsProvenSorted = false;
3208 
3209 static bool AArch64SIMDIntrinsicsProvenSorted = false;
3210 static bool AArch64SISDIntrinsicsProvenSorted = false;
3211 
3212 
3213 static const NeonIntrinsicInfo *
3214 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
3215                        unsigned BuiltinID, bool &MapProvenSorted) {
3216 
3217 #ifndef NDEBUG
3218   if (!MapProvenSorted) {
3219     assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3220     MapProvenSorted = true;
3221   }
3222 #endif
3223 
3224   const NeonIntrinsicInfo *Builtin =
3225       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3226 
3227   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
3228     return Builtin;
3229 
3230   return nullptr;
3231 }
3232 
3233 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3234                                                    unsigned Modifier,
3235                                                    llvm::Type *ArgType,
3236                                                    const CallExpr *E) {
3237   int VectorSize = 0;
3238   if (Modifier & Use64BitVectors)
3239     VectorSize = 64;
3240   else if (Modifier & Use128BitVectors)
3241     VectorSize = 128;
3242 
3243   // Return type.
3244   SmallVector<llvm::Type *, 3> Tys;
3245   if (Modifier & AddRetType) {
3246     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
3247     if (Modifier & VectorizeRetType)
3248       Ty = llvm::VectorType::get(
3249           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
3250 
3251     Tys.push_back(Ty);
3252   }
3253 
3254   // Arguments.
3255   if (Modifier & VectorizeArgTypes) {
3256     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
3257     ArgType = llvm::VectorType::get(ArgType, Elts);
3258   }
3259 
3260   if (Modifier & (Add1ArgType | Add2ArgTypes))
3261     Tys.push_back(ArgType);
3262 
3263   if (Modifier & Add2ArgTypes)
3264     Tys.push_back(ArgType);
3265 
3266   if (Modifier & InventFloatType)
3267     Tys.push_back(FloatTy);
3268 
3269   return CGM.getIntrinsic(IntrinsicID, Tys);
3270 }
3271 
3272 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
3273                                             const NeonIntrinsicInfo &SISDInfo,
3274                                             SmallVectorImpl<Value *> &Ops,
3275                                             const CallExpr *E) {
3276   unsigned BuiltinID = SISDInfo.BuiltinID;
3277   unsigned int Int = SISDInfo.LLVMIntrinsic;
3278   unsigned Modifier = SISDInfo.TypeModifier;
3279   const char *s = SISDInfo.NameHint;
3280 
3281   switch (BuiltinID) {
3282   case NEON::BI__builtin_neon_vcled_s64:
3283   case NEON::BI__builtin_neon_vcled_u64:
3284   case NEON::BI__builtin_neon_vcles_f32:
3285   case NEON::BI__builtin_neon_vcled_f64:
3286   case NEON::BI__builtin_neon_vcltd_s64:
3287   case NEON::BI__builtin_neon_vcltd_u64:
3288   case NEON::BI__builtin_neon_vclts_f32:
3289   case NEON::BI__builtin_neon_vcltd_f64:
3290   case NEON::BI__builtin_neon_vcales_f32:
3291   case NEON::BI__builtin_neon_vcaled_f64:
3292   case NEON::BI__builtin_neon_vcalts_f32:
3293   case NEON::BI__builtin_neon_vcaltd_f64:
3294     // Only one direction of comparisons actually exist, cmle is actually a cmge
3295     // with swapped operands. The table gives us the right intrinsic but we
3296     // still need to do the swap.
3297     std::swap(Ops[0], Ops[1]);
3298     break;
3299   }
3300 
3301   assert(Int && "Generic code assumes a valid intrinsic");
3302 
3303   // Determine the type(s) of this overloaded AArch64 intrinsic.
3304   const Expr *Arg = E->getArg(0);
3305   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3306   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3307 
3308   int j = 0;
3309   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3310   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3311        ai != ae; ++ai, ++j) {
3312     llvm::Type *ArgTy = ai->getType();
3313     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
3314              ArgTy->getPrimitiveSizeInBits())
3315       continue;
3316 
3317     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
3318     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
3319     // it before inserting.
3320     Ops[j] =
3321         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
3322     Ops[j] =
3323         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
3324   }
3325 
3326   Value *Result = CGF.EmitNeonCall(F, Ops, s);
3327   llvm::Type *ResultType = CGF.ConvertType(E->getType());
3328   if (ResultType->getPrimitiveSizeInBits() <
3329       Result->getType()->getPrimitiveSizeInBits())
3330     return CGF.Builder.CreateExtractElement(Result, C0);
3331 
3332   return CGF.Builder.CreateBitCast(Result, ResultType, s);
3333 }
3334 
3335 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
3336     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
3337     const char *NameHint, unsigned Modifier, const CallExpr *E,
3338     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
3339   // Get the last argument, which specifies the vector type.
3340   llvm::APSInt NeonTypeConst;
3341   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3342   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
3343     return nullptr;
3344 
3345   // Determine the type of this overloaded NEON intrinsic.
3346   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
3347   bool Usgn = Type.isUnsigned();
3348   bool Quad = Type.isQuad();
3349 
3350   llvm::VectorType *VTy = GetNeonType(this, Type);
3351   llvm::Type *Ty = VTy;
3352   if (!Ty)
3353     return nullptr;
3354 
3355   auto getAlignmentValue32 = [&](Address addr) -> Value* {
3356     return Builder.getInt32(addr.getAlignment().getQuantity());
3357   };
3358 
3359   unsigned Int = LLVMIntrinsic;
3360   if ((Modifier & UnsignedAlts) && !Usgn)
3361     Int = AltLLVMIntrinsic;
3362 
3363   switch (BuiltinID) {
3364   default: break;
3365   case NEON::BI__builtin_neon_vabs_v:
3366   case NEON::BI__builtin_neon_vabsq_v:
3367     if (VTy->getElementType()->isFloatingPointTy())
3368       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
3369     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
3370   case NEON::BI__builtin_neon_vaddhn_v: {
3371     llvm::VectorType *SrcTy =
3372         llvm::VectorType::getExtendedElementVectorType(VTy);
3373 
3374     // %sum = add <4 x i32> %lhs, %rhs
3375     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3376     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3377     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3378 
3379     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3380     Constant *ShiftAmt =
3381         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3382     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3383 
3384     // %res = trunc <4 x i32> %high to <4 x i16>
3385     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3386   }
3387   case NEON::BI__builtin_neon_vcale_v:
3388   case NEON::BI__builtin_neon_vcaleq_v:
3389   case NEON::BI__builtin_neon_vcalt_v:
3390   case NEON::BI__builtin_neon_vcaltq_v:
3391     std::swap(Ops[0], Ops[1]);
3392   case NEON::BI__builtin_neon_vcage_v:
3393   case NEON::BI__builtin_neon_vcageq_v:
3394   case NEON::BI__builtin_neon_vcagt_v:
3395   case NEON::BI__builtin_neon_vcagtq_v: {
3396     llvm::Type *VecFlt = llvm::VectorType::get(
3397         VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
3398         VTy->getNumElements());
3399     llvm::Type *Tys[] = { VTy, VecFlt };
3400     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3401     return EmitNeonCall(F, Ops, NameHint);
3402   }
3403   case NEON::BI__builtin_neon_vclz_v:
3404   case NEON::BI__builtin_neon_vclzq_v:
3405     // We generate target-independent intrinsic, which needs a second argument
3406     // for whether or not clz of zero is undefined; on ARM it isn't.
3407     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3408     break;
3409   case NEON::BI__builtin_neon_vcvt_f32_v:
3410   case NEON::BI__builtin_neon_vcvtq_f32_v:
3411     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3412     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3413     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3414                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3415   case NEON::BI__builtin_neon_vcvt_n_f32_v:
3416   case NEON::BI__builtin_neon_vcvt_n_f64_v:
3417   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3418   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3419     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3420     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
3421     Function *F = CGM.getIntrinsic(Int, Tys);
3422     return EmitNeonCall(F, Ops, "vcvt_n");
3423   }
3424   case NEON::BI__builtin_neon_vcvt_n_s32_v:
3425   case NEON::BI__builtin_neon_vcvt_n_u32_v:
3426   case NEON::BI__builtin_neon_vcvt_n_s64_v:
3427   case NEON::BI__builtin_neon_vcvt_n_u64_v:
3428   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3429   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3430   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3431   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3432     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3433     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3434     return EmitNeonCall(F, Ops, "vcvt_n");
3435   }
3436   case NEON::BI__builtin_neon_vcvt_s32_v:
3437   case NEON::BI__builtin_neon_vcvt_u32_v:
3438   case NEON::BI__builtin_neon_vcvt_s64_v:
3439   case NEON::BI__builtin_neon_vcvt_u64_v:
3440   case NEON::BI__builtin_neon_vcvtq_s32_v:
3441   case NEON::BI__builtin_neon_vcvtq_u32_v:
3442   case NEON::BI__builtin_neon_vcvtq_s64_v:
3443   case NEON::BI__builtin_neon_vcvtq_u64_v: {
3444     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3445     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3446                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3447   }
3448   case NEON::BI__builtin_neon_vcvta_s32_v:
3449   case NEON::BI__builtin_neon_vcvta_s64_v:
3450   case NEON::BI__builtin_neon_vcvta_u32_v:
3451   case NEON::BI__builtin_neon_vcvta_u64_v:
3452   case NEON::BI__builtin_neon_vcvtaq_s32_v:
3453   case NEON::BI__builtin_neon_vcvtaq_s64_v:
3454   case NEON::BI__builtin_neon_vcvtaq_u32_v:
3455   case NEON::BI__builtin_neon_vcvtaq_u64_v:
3456   case NEON::BI__builtin_neon_vcvtn_s32_v:
3457   case NEON::BI__builtin_neon_vcvtn_s64_v:
3458   case NEON::BI__builtin_neon_vcvtn_u32_v:
3459   case NEON::BI__builtin_neon_vcvtn_u64_v:
3460   case NEON::BI__builtin_neon_vcvtnq_s32_v:
3461   case NEON::BI__builtin_neon_vcvtnq_s64_v:
3462   case NEON::BI__builtin_neon_vcvtnq_u32_v:
3463   case NEON::BI__builtin_neon_vcvtnq_u64_v:
3464   case NEON::BI__builtin_neon_vcvtp_s32_v:
3465   case NEON::BI__builtin_neon_vcvtp_s64_v:
3466   case NEON::BI__builtin_neon_vcvtp_u32_v:
3467   case NEON::BI__builtin_neon_vcvtp_u64_v:
3468   case NEON::BI__builtin_neon_vcvtpq_s32_v:
3469   case NEON::BI__builtin_neon_vcvtpq_s64_v:
3470   case NEON::BI__builtin_neon_vcvtpq_u32_v:
3471   case NEON::BI__builtin_neon_vcvtpq_u64_v:
3472   case NEON::BI__builtin_neon_vcvtm_s32_v:
3473   case NEON::BI__builtin_neon_vcvtm_s64_v:
3474   case NEON::BI__builtin_neon_vcvtm_u32_v:
3475   case NEON::BI__builtin_neon_vcvtm_u64_v:
3476   case NEON::BI__builtin_neon_vcvtmq_s32_v:
3477   case NEON::BI__builtin_neon_vcvtmq_s64_v:
3478   case NEON::BI__builtin_neon_vcvtmq_u32_v:
3479   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
3480     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3481     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
3482   }
3483   case NEON::BI__builtin_neon_vext_v:
3484   case NEON::BI__builtin_neon_vextq_v: {
3485     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3486     SmallVector<uint32_t, 16> Indices;
3487     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3488       Indices.push_back(i+CV);
3489 
3490     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3491     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3492     return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
3493   }
3494   case NEON::BI__builtin_neon_vfma_v:
3495   case NEON::BI__builtin_neon_vfmaq_v: {
3496     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
3497     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3498     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3499     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3500 
3501     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
3502     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
3503   }
3504   case NEON::BI__builtin_neon_vld1_v:
3505   case NEON::BI__builtin_neon_vld1q_v: {
3506     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3507     Ops.push_back(getAlignmentValue32(PtrOp0));
3508     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
3509   }
3510   case NEON::BI__builtin_neon_vld2_v:
3511   case NEON::BI__builtin_neon_vld2q_v:
3512   case NEON::BI__builtin_neon_vld3_v:
3513   case NEON::BI__builtin_neon_vld3q_v:
3514   case NEON::BI__builtin_neon_vld4_v:
3515   case NEON::BI__builtin_neon_vld4q_v: {
3516     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3517     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3518     Value *Align = getAlignmentValue32(PtrOp1);
3519     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
3520     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3521     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3522     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3523   }
3524   case NEON::BI__builtin_neon_vld1_dup_v:
3525   case NEON::BI__builtin_neon_vld1q_dup_v: {
3526     Value *V = UndefValue::get(Ty);
3527     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3528     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
3529     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
3530     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3531     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
3532     return EmitNeonSplat(Ops[0], CI);
3533   }
3534   case NEON::BI__builtin_neon_vld2_lane_v:
3535   case NEON::BI__builtin_neon_vld2q_lane_v:
3536   case NEON::BI__builtin_neon_vld3_lane_v:
3537   case NEON::BI__builtin_neon_vld3q_lane_v:
3538   case NEON::BI__builtin_neon_vld4_lane_v:
3539   case NEON::BI__builtin_neon_vld4q_lane_v: {
3540     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3541     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3542     for (unsigned I = 2; I < Ops.size() - 1; ++I)
3543       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
3544     Ops.push_back(getAlignmentValue32(PtrOp1));
3545     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
3546     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3547     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3548     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3549   }
3550   case NEON::BI__builtin_neon_vmovl_v: {
3551     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
3552     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
3553     if (Usgn)
3554       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
3555     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
3556   }
3557   case NEON::BI__builtin_neon_vmovn_v: {
3558     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3559     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
3560     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
3561   }
3562   case NEON::BI__builtin_neon_vmull_v:
3563     // FIXME: the integer vmull operations could be emitted in terms of pure
3564     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
3565     // hoisting the exts outside loops. Until global ISel comes along that can
3566     // see through such movement this leads to bad CodeGen. So we need an
3567     // intrinsic for now.
3568     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
3569     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
3570     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
3571   case NEON::BI__builtin_neon_vpadal_v:
3572   case NEON::BI__builtin_neon_vpadalq_v: {
3573     // The source operand type has twice as many elements of half the size.
3574     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3575     llvm::Type *EltTy =
3576       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3577     llvm::Type *NarrowTy =
3578       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3579     llvm::Type *Tys[2] = { Ty, NarrowTy };
3580     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
3581   }
3582   case NEON::BI__builtin_neon_vpaddl_v:
3583   case NEON::BI__builtin_neon_vpaddlq_v: {
3584     // The source operand type has twice as many elements of half the size.
3585     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3586     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3587     llvm::Type *NarrowTy =
3588       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3589     llvm::Type *Tys[2] = { Ty, NarrowTy };
3590     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
3591   }
3592   case NEON::BI__builtin_neon_vqdmlal_v:
3593   case NEON::BI__builtin_neon_vqdmlsl_v: {
3594     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
3595     Ops[1] =
3596         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
3597     Ops.resize(2);
3598     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
3599   }
3600   case NEON::BI__builtin_neon_vqshl_n_v:
3601   case NEON::BI__builtin_neon_vqshlq_n_v:
3602     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
3603                         1, false);
3604   case NEON::BI__builtin_neon_vqshlu_n_v:
3605   case NEON::BI__builtin_neon_vqshluq_n_v:
3606     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
3607                         1, false);
3608   case NEON::BI__builtin_neon_vrecpe_v:
3609   case NEON::BI__builtin_neon_vrecpeq_v:
3610   case NEON::BI__builtin_neon_vrsqrte_v:
3611   case NEON::BI__builtin_neon_vrsqrteq_v:
3612     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
3613     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
3614 
3615   case NEON::BI__builtin_neon_vrshr_n_v:
3616   case NEON::BI__builtin_neon_vrshrq_n_v:
3617     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
3618                         1, true);
3619   case NEON::BI__builtin_neon_vshl_n_v:
3620   case NEON::BI__builtin_neon_vshlq_n_v:
3621     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
3622     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
3623                              "vshl_n");
3624   case NEON::BI__builtin_neon_vshll_n_v: {
3625     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
3626     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3627     if (Usgn)
3628       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
3629     else
3630       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
3631     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
3632     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
3633   }
3634   case NEON::BI__builtin_neon_vshrn_n_v: {
3635     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3636     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3637     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
3638     if (Usgn)
3639       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
3640     else
3641       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
3642     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
3643   }
3644   case NEON::BI__builtin_neon_vshr_n_v:
3645   case NEON::BI__builtin_neon_vshrq_n_v:
3646     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
3647   case NEON::BI__builtin_neon_vst1_v:
3648   case NEON::BI__builtin_neon_vst1q_v:
3649   case NEON::BI__builtin_neon_vst2_v:
3650   case NEON::BI__builtin_neon_vst2q_v:
3651   case NEON::BI__builtin_neon_vst3_v:
3652   case NEON::BI__builtin_neon_vst3q_v:
3653   case NEON::BI__builtin_neon_vst4_v:
3654   case NEON::BI__builtin_neon_vst4q_v:
3655   case NEON::BI__builtin_neon_vst2_lane_v:
3656   case NEON::BI__builtin_neon_vst2q_lane_v:
3657   case NEON::BI__builtin_neon_vst3_lane_v:
3658   case NEON::BI__builtin_neon_vst3q_lane_v:
3659   case NEON::BI__builtin_neon_vst4_lane_v:
3660   case NEON::BI__builtin_neon_vst4q_lane_v: {
3661     llvm::Type *Tys[] = {Int8PtrTy, Ty};
3662     Ops.push_back(getAlignmentValue32(PtrOp0));
3663     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
3664   }
3665   case NEON::BI__builtin_neon_vsubhn_v: {
3666     llvm::VectorType *SrcTy =
3667         llvm::VectorType::getExtendedElementVectorType(VTy);
3668 
3669     // %sum = add <4 x i32> %lhs, %rhs
3670     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3671     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3672     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
3673 
3674     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3675     Constant *ShiftAmt =
3676         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3677     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
3678 
3679     // %res = trunc <4 x i32> %high to <4 x i16>
3680     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
3681   }
3682   case NEON::BI__builtin_neon_vtrn_v:
3683   case NEON::BI__builtin_neon_vtrnq_v: {
3684     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3685     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3686     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3687     Value *SV = nullptr;
3688 
3689     for (unsigned vi = 0; vi != 2; ++vi) {
3690       SmallVector<uint32_t, 16> Indices;
3691       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3692         Indices.push_back(i+vi);
3693         Indices.push_back(i+e+vi);
3694       }
3695       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3696       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
3697       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3698     }
3699     return SV;
3700   }
3701   case NEON::BI__builtin_neon_vtst_v:
3702   case NEON::BI__builtin_neon_vtstq_v: {
3703     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3704     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3705     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
3706     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
3707                                 ConstantAggregateZero::get(Ty));
3708     return Builder.CreateSExt(Ops[0], Ty, "vtst");
3709   }
3710   case NEON::BI__builtin_neon_vuzp_v:
3711   case NEON::BI__builtin_neon_vuzpq_v: {
3712     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3713     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3714     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3715     Value *SV = nullptr;
3716 
3717     for (unsigned vi = 0; vi != 2; ++vi) {
3718       SmallVector<uint32_t, 16> Indices;
3719       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3720         Indices.push_back(2*i+vi);
3721 
3722       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3723       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
3724       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3725     }
3726     return SV;
3727   }
3728   case NEON::BI__builtin_neon_vzip_v:
3729   case NEON::BI__builtin_neon_vzipq_v: {
3730     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3731     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3732     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3733     Value *SV = nullptr;
3734 
3735     for (unsigned vi = 0; vi != 2; ++vi) {
3736       SmallVector<uint32_t, 16> Indices;
3737       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3738         Indices.push_back((i + vi*e) >> 1);
3739         Indices.push_back(((i + vi*e) >> 1)+e);
3740       }
3741       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3742       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
3743       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3744     }
3745     return SV;
3746   }
3747   }
3748 
3749   assert(Int && "Expected valid intrinsic number");
3750 
3751   // Determine the type(s) of this overloaded AArch64 intrinsic.
3752   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
3753 
3754   Value *Result = EmitNeonCall(F, Ops, NameHint);
3755   llvm::Type *ResultType = ConvertType(E->getType());
3756   // AArch64 intrinsic one-element vector type cast to
3757   // scalar type expected by the builtin
3758   return Builder.CreateBitCast(Result, ResultType, NameHint);
3759 }
3760 
3761 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
3762     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
3763     const CmpInst::Predicate Ip, const Twine &Name) {
3764   llvm::Type *OTy = Op->getType();
3765 
3766   // FIXME: this is utterly horrific. We should not be looking at previous
3767   // codegen context to find out what needs doing. Unfortunately TableGen
3768   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
3769   // (etc).
3770   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
3771     OTy = BI->getOperand(0)->getType();
3772 
3773   Op = Builder.CreateBitCast(Op, OTy);
3774   if (OTy->getScalarType()->isFloatingPointTy()) {
3775     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
3776   } else {
3777     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
3778   }
3779   return Builder.CreateSExt(Op, Ty, Name);
3780 }
3781 
3782 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
3783                                  Value *ExtOp, Value *IndexOp,
3784                                  llvm::Type *ResTy, unsigned IntID,
3785                                  const char *Name) {
3786   SmallVector<Value *, 2> TblOps;
3787   if (ExtOp)
3788     TblOps.push_back(ExtOp);
3789 
3790   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
3791   SmallVector<uint32_t, 16> Indices;
3792   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
3793   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
3794     Indices.push_back(2*i);
3795     Indices.push_back(2*i+1);
3796   }
3797 
3798   int PairPos = 0, End = Ops.size() - 1;
3799   while (PairPos < End) {
3800     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
3801                                                      Ops[PairPos+1], Indices,
3802                                                      Name));
3803     PairPos += 2;
3804   }
3805 
3806   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
3807   // of the 128-bit lookup table with zero.
3808   if (PairPos == End) {
3809     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
3810     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
3811                                                      ZeroTbl, Indices, Name));
3812   }
3813 
3814   Function *TblF;
3815   TblOps.push_back(IndexOp);
3816   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
3817 
3818   return CGF.EmitNeonCall(TblF, TblOps, Name);
3819 }
3820 
3821 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
3822   unsigned Value;
3823   switch (BuiltinID) {
3824   default:
3825     return nullptr;
3826   case ARM::BI__builtin_arm_nop:
3827     Value = 0;
3828     break;
3829   case ARM::BI__builtin_arm_yield:
3830   case ARM::BI__yield:
3831     Value = 1;
3832     break;
3833   case ARM::BI__builtin_arm_wfe:
3834   case ARM::BI__wfe:
3835     Value = 2;
3836     break;
3837   case ARM::BI__builtin_arm_wfi:
3838   case ARM::BI__wfi:
3839     Value = 3;
3840     break;
3841   case ARM::BI__builtin_arm_sev:
3842   case ARM::BI__sev:
3843     Value = 4;
3844     break;
3845   case ARM::BI__builtin_arm_sevl:
3846   case ARM::BI__sevl:
3847     Value = 5;
3848     break;
3849   }
3850 
3851   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
3852                             llvm::ConstantInt::get(Int32Ty, Value));
3853 }
3854 
3855 // Generates the IR for the read/write special register builtin,
3856 // ValueType is the type of the value that is to be written or read,
3857 // RegisterType is the type of the register being written to or read from.
3858 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
3859                                          const CallExpr *E,
3860                                          llvm::Type *RegisterType,
3861                                          llvm::Type *ValueType,
3862                                          bool IsRead,
3863                                          StringRef SysReg = "") {
3864   // write and register intrinsics only support 32 and 64 bit operations.
3865   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
3866           && "Unsupported size for register.");
3867 
3868   CodeGen::CGBuilderTy &Builder = CGF.Builder;
3869   CodeGen::CodeGenModule &CGM = CGF.CGM;
3870   LLVMContext &Context = CGM.getLLVMContext();
3871 
3872   if (SysReg.empty()) {
3873     const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
3874     SysReg = cast<StringLiteral>(SysRegStrExpr)->getString();
3875   }
3876 
3877   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
3878   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
3879   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
3880 
3881   llvm::Type *Types[] = { RegisterType };
3882 
3883   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
3884   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
3885             && "Can't fit 64-bit value in 32-bit register");
3886 
3887   if (IsRead) {
3888     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
3889     llvm::Value *Call = Builder.CreateCall(F, Metadata);
3890 
3891     if (MixedTypes)
3892       // Read into 64 bit register and then truncate result to 32 bit.
3893       return Builder.CreateTrunc(Call, ValueType);
3894 
3895     if (ValueType->isPointerTy())
3896       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
3897       return Builder.CreateIntToPtr(Call, ValueType);
3898 
3899     return Call;
3900   }
3901 
3902   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
3903   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
3904   if (MixedTypes) {
3905     // Extend 32 bit write value to 64 bit to pass to write.
3906     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
3907     return Builder.CreateCall(F, { Metadata, ArgValue });
3908   }
3909 
3910   if (ValueType->isPointerTy()) {
3911     // Have VoidPtrTy ArgValue but want to return an i32/i64.
3912     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
3913     return Builder.CreateCall(F, { Metadata, ArgValue });
3914   }
3915 
3916   return Builder.CreateCall(F, { Metadata, ArgValue });
3917 }
3918 
3919 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
3920 /// argument that specifies the vector type.
3921 static bool HasExtraNeonArgument(unsigned BuiltinID) {
3922   switch (BuiltinID) {
3923   default: break;
3924   case NEON::BI__builtin_neon_vget_lane_i8:
3925   case NEON::BI__builtin_neon_vget_lane_i16:
3926   case NEON::BI__builtin_neon_vget_lane_i32:
3927   case NEON::BI__builtin_neon_vget_lane_i64:
3928   case NEON::BI__builtin_neon_vget_lane_f32:
3929   case NEON::BI__builtin_neon_vgetq_lane_i8:
3930   case NEON::BI__builtin_neon_vgetq_lane_i16:
3931   case NEON::BI__builtin_neon_vgetq_lane_i32:
3932   case NEON::BI__builtin_neon_vgetq_lane_i64:
3933   case NEON::BI__builtin_neon_vgetq_lane_f32:
3934   case NEON::BI__builtin_neon_vset_lane_i8:
3935   case NEON::BI__builtin_neon_vset_lane_i16:
3936   case NEON::BI__builtin_neon_vset_lane_i32:
3937   case NEON::BI__builtin_neon_vset_lane_i64:
3938   case NEON::BI__builtin_neon_vset_lane_f32:
3939   case NEON::BI__builtin_neon_vsetq_lane_i8:
3940   case NEON::BI__builtin_neon_vsetq_lane_i16:
3941   case NEON::BI__builtin_neon_vsetq_lane_i32:
3942   case NEON::BI__builtin_neon_vsetq_lane_i64:
3943   case NEON::BI__builtin_neon_vsetq_lane_f32:
3944   case NEON::BI__builtin_neon_vsha1h_u32:
3945   case NEON::BI__builtin_neon_vsha1cq_u32:
3946   case NEON::BI__builtin_neon_vsha1pq_u32:
3947   case NEON::BI__builtin_neon_vsha1mq_u32:
3948   case ARM::BI_MoveToCoprocessor:
3949   case ARM::BI_MoveToCoprocessor2:
3950     return false;
3951   }
3952   return true;
3953 }
3954 
3955 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
3956                                            const CallExpr *E) {
3957   if (auto Hint = GetValueForARMHint(BuiltinID))
3958     return Hint;
3959 
3960   if (BuiltinID == ARM::BI__emit) {
3961     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
3962     llvm::FunctionType *FTy =
3963         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
3964 
3965     APSInt Value;
3966     if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
3967       llvm_unreachable("Sema will ensure that the parameter is constant");
3968 
3969     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
3970 
3971     llvm::InlineAsm *Emit =
3972         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
3973                                  /*SideEffects=*/true)
3974                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
3975                                  /*SideEffects=*/true);
3976 
3977     return Builder.CreateCall(Emit);
3978   }
3979 
3980   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
3981     Value *Option = EmitScalarExpr(E->getArg(0));
3982     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
3983   }
3984 
3985   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
3986     Value *Address = EmitScalarExpr(E->getArg(0));
3987     Value *RW      = EmitScalarExpr(E->getArg(1));
3988     Value *IsData  = EmitScalarExpr(E->getArg(2));
3989 
3990     // Locality is not supported on ARM target
3991     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
3992 
3993     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
3994     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
3995   }
3996 
3997   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
3998     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit),
3999                                                EmitScalarExpr(E->getArg(0)),
4000                               "rbit");
4001   }
4002 
4003   if (BuiltinID == ARM::BI__clear_cache) {
4004     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4005     const FunctionDecl *FD = E->getDirectCallee();
4006     Value *Ops[2];
4007     for (unsigned i = 0; i < 2; i++)
4008       Ops[i] = EmitScalarExpr(E->getArg(i));
4009     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4010     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4011     StringRef Name = FD->getName();
4012     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4013   }
4014 
4015   if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4016       BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4017     Function *F;
4018 
4019     switch (BuiltinID) {
4020     default: llvm_unreachable("unexpected builtin");
4021     case ARM::BI__builtin_arm_mcrr:
4022       F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4023       break;
4024     case ARM::BI__builtin_arm_mcrr2:
4025       F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4026       break;
4027     }
4028 
4029     // MCRR{2} instruction has 5 operands but
4030     // the intrinsic has 4 because Rt and Rt2
4031     // are represented as a single unsigned 64
4032     // bit integer in the intrinsic definition
4033     // but internally it's represented as 2 32
4034     // bit integers.
4035 
4036     Value *Coproc = EmitScalarExpr(E->getArg(0));
4037     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4038     Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4039     Value *CRm = EmitScalarExpr(E->getArg(3));
4040 
4041     Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4042     Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4043     Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4044     Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4045 
4046     return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4047   }
4048 
4049   if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4050       BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4051     Function *F;
4052 
4053     switch (BuiltinID) {
4054     default: llvm_unreachable("unexpected builtin");
4055     case ARM::BI__builtin_arm_mrrc:
4056       F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4057       break;
4058     case ARM::BI__builtin_arm_mrrc2:
4059       F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4060       break;
4061     }
4062 
4063     Value *Coproc = EmitScalarExpr(E->getArg(0));
4064     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4065     Value *CRm  = EmitScalarExpr(E->getArg(2));
4066     Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4067 
4068     // Returns an unsigned 64 bit integer, represented
4069     // as two 32 bit integers.
4070 
4071     Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4072     Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4073     Rt = Builder.CreateZExt(Rt, Int64Ty);
4074     Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4075 
4076     Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4077     RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4078     RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4079 
4080     return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4081   }
4082 
4083   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4084       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4085         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4086        getContext().getTypeSize(E->getType()) == 64) ||
4087       BuiltinID == ARM::BI__ldrexd) {
4088     Function *F;
4089 
4090     switch (BuiltinID) {
4091     default: llvm_unreachable("unexpected builtin");
4092     case ARM::BI__builtin_arm_ldaex:
4093       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4094       break;
4095     case ARM::BI__builtin_arm_ldrexd:
4096     case ARM::BI__builtin_arm_ldrex:
4097     case ARM::BI__ldrexd:
4098       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4099       break;
4100     }
4101 
4102     Value *LdPtr = EmitScalarExpr(E->getArg(0));
4103     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4104                                     "ldrexd");
4105 
4106     Value *Val0 = Builder.CreateExtractValue(Val, 1);
4107     Value *Val1 = Builder.CreateExtractValue(Val, 0);
4108     Val0 = Builder.CreateZExt(Val0, Int64Ty);
4109     Val1 = Builder.CreateZExt(Val1, Int64Ty);
4110 
4111     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4112     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4113     Val = Builder.CreateOr(Val, Val1);
4114     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4115   }
4116 
4117   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4118       BuiltinID == ARM::BI__builtin_arm_ldaex) {
4119     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4120 
4121     QualType Ty = E->getType();
4122     llvm::Type *RealResTy = ConvertType(Ty);
4123     llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
4124                                                   getContext().getTypeSize(Ty));
4125     LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
4126 
4127     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4128                                        ? Intrinsic::arm_ldaex
4129                                        : Intrinsic::arm_ldrex,
4130                                    LoadAddr->getType());
4131     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4132 
4133     if (RealResTy->isPointerTy())
4134       return Builder.CreateIntToPtr(Val, RealResTy);
4135     else {
4136       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4137       return Builder.CreateBitCast(Val, RealResTy);
4138     }
4139   }
4140 
4141   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
4142       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4143         BuiltinID == ARM::BI__builtin_arm_strex) &&
4144        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
4145     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4146                                        ? Intrinsic::arm_stlexd
4147                                        : Intrinsic::arm_strexd);
4148     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr);
4149 
4150     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4151     Value *Val = EmitScalarExpr(E->getArg(0));
4152     Builder.CreateStore(Val, Tmp);
4153 
4154     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4155     Val = Builder.CreateLoad(LdPtr);
4156 
4157     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4158     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4159     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4160     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4161   }
4162 
4163   if (BuiltinID == ARM::BI__builtin_arm_strex ||
4164       BuiltinID == ARM::BI__builtin_arm_stlex) {
4165     Value *StoreVal = EmitScalarExpr(E->getArg(0));
4166     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4167 
4168     QualType Ty = E->getArg(0)->getType();
4169     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4170                                                  getContext().getTypeSize(Ty));
4171     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4172 
4173     if (StoreVal->getType()->isPointerTy())
4174       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4175     else {
4176       StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
4177       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4178     }
4179 
4180     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4181                                        ? Intrinsic::arm_stlex
4182                                        : Intrinsic::arm_strex,
4183                                    StoreAddr->getType());
4184     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4185   }
4186 
4187   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
4188     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4189     return Builder.CreateCall(F);
4190   }
4191 
4192   // CRC32
4193   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4194   switch (BuiltinID) {
4195   case ARM::BI__builtin_arm_crc32b:
4196     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
4197   case ARM::BI__builtin_arm_crc32cb:
4198     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
4199   case ARM::BI__builtin_arm_crc32h:
4200     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
4201   case ARM::BI__builtin_arm_crc32ch:
4202     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
4203   case ARM::BI__builtin_arm_crc32w:
4204   case ARM::BI__builtin_arm_crc32d:
4205     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
4206   case ARM::BI__builtin_arm_crc32cw:
4207   case ARM::BI__builtin_arm_crc32cd:
4208     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
4209   }
4210 
4211   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4212     Value *Arg0 = EmitScalarExpr(E->getArg(0));
4213     Value *Arg1 = EmitScalarExpr(E->getArg(1));
4214 
4215     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
4216     // intrinsics, hence we need different codegen for these cases.
4217     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
4218         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
4219       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4220       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
4221       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
4222       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
4223 
4224       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4225       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
4226       return Builder.CreateCall(F, {Res, Arg1b});
4227     } else {
4228       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
4229 
4230       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4231       return Builder.CreateCall(F, {Arg0, Arg1});
4232     }
4233   }
4234 
4235   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
4236       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4237       BuiltinID == ARM::BI__builtin_arm_rsrp ||
4238       BuiltinID == ARM::BI__builtin_arm_wsr ||
4239       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
4240       BuiltinID == ARM::BI__builtin_arm_wsrp) {
4241 
4242     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
4243                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4244                   BuiltinID == ARM::BI__builtin_arm_rsrp;
4245 
4246     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
4247                             BuiltinID == ARM::BI__builtin_arm_wsrp;
4248 
4249     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4250                    BuiltinID == ARM::BI__builtin_arm_wsr64;
4251 
4252     llvm::Type *ValueType;
4253     llvm::Type *RegisterType;
4254     if (IsPointerBuiltin) {
4255       ValueType = VoidPtrTy;
4256       RegisterType = Int32Ty;
4257     } else if (Is64Bit) {
4258       ValueType = RegisterType = Int64Ty;
4259     } else {
4260       ValueType = RegisterType = Int32Ty;
4261     }
4262 
4263     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4264   }
4265 
4266   // Find out if any arguments are required to be integer constant
4267   // expressions.
4268   unsigned ICEArguments = 0;
4269   ASTContext::GetBuiltinTypeError Error;
4270   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4271   assert(Error == ASTContext::GE_None && "Should not codegen an error");
4272 
4273   auto getAlignmentValue32 = [&](Address addr) -> Value* {
4274     return Builder.getInt32(addr.getAlignment().getQuantity());
4275   };
4276 
4277   Address PtrOp0 = Address::invalid();
4278   Address PtrOp1 = Address::invalid();
4279   SmallVector<Value*, 4> Ops;
4280   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
4281   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
4282   for (unsigned i = 0, e = NumArgs; i != e; i++) {
4283     if (i == 0) {
4284       switch (BuiltinID) {
4285       case NEON::BI__builtin_neon_vld1_v:
4286       case NEON::BI__builtin_neon_vld1q_v:
4287       case NEON::BI__builtin_neon_vld1q_lane_v:
4288       case NEON::BI__builtin_neon_vld1_lane_v:
4289       case NEON::BI__builtin_neon_vld1_dup_v:
4290       case NEON::BI__builtin_neon_vld1q_dup_v:
4291       case NEON::BI__builtin_neon_vst1_v:
4292       case NEON::BI__builtin_neon_vst1q_v:
4293       case NEON::BI__builtin_neon_vst1q_lane_v:
4294       case NEON::BI__builtin_neon_vst1_lane_v:
4295       case NEON::BI__builtin_neon_vst2_v:
4296       case NEON::BI__builtin_neon_vst2q_v:
4297       case NEON::BI__builtin_neon_vst2_lane_v:
4298       case NEON::BI__builtin_neon_vst2q_lane_v:
4299       case NEON::BI__builtin_neon_vst3_v:
4300       case NEON::BI__builtin_neon_vst3q_v:
4301       case NEON::BI__builtin_neon_vst3_lane_v:
4302       case NEON::BI__builtin_neon_vst3q_lane_v:
4303       case NEON::BI__builtin_neon_vst4_v:
4304       case NEON::BI__builtin_neon_vst4q_v:
4305       case NEON::BI__builtin_neon_vst4_lane_v:
4306       case NEON::BI__builtin_neon_vst4q_lane_v:
4307         // Get the alignment for the argument in addition to the value;
4308         // we'll use it later.
4309         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
4310         Ops.push_back(PtrOp0.getPointer());
4311         continue;
4312       }
4313     }
4314     if (i == 1) {
4315       switch (BuiltinID) {
4316       case NEON::BI__builtin_neon_vld2_v:
4317       case NEON::BI__builtin_neon_vld2q_v:
4318       case NEON::BI__builtin_neon_vld3_v:
4319       case NEON::BI__builtin_neon_vld3q_v:
4320       case NEON::BI__builtin_neon_vld4_v:
4321       case NEON::BI__builtin_neon_vld4q_v:
4322       case NEON::BI__builtin_neon_vld2_lane_v:
4323       case NEON::BI__builtin_neon_vld2q_lane_v:
4324       case NEON::BI__builtin_neon_vld3_lane_v:
4325       case NEON::BI__builtin_neon_vld3q_lane_v:
4326       case NEON::BI__builtin_neon_vld4_lane_v:
4327       case NEON::BI__builtin_neon_vld4q_lane_v:
4328       case NEON::BI__builtin_neon_vld2_dup_v:
4329       case NEON::BI__builtin_neon_vld3_dup_v:
4330       case NEON::BI__builtin_neon_vld4_dup_v:
4331         // Get the alignment for the argument in addition to the value;
4332         // we'll use it later.
4333         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
4334         Ops.push_back(PtrOp1.getPointer());
4335         continue;
4336       }
4337     }
4338 
4339     if ((ICEArguments & (1 << i)) == 0) {
4340       Ops.push_back(EmitScalarExpr(E->getArg(i)));
4341     } else {
4342       // If this is required to be a constant, constant fold it so that we know
4343       // that the generated intrinsic gets a ConstantInt.
4344       llvm::APSInt Result;
4345       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4346       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
4347       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4348     }
4349   }
4350 
4351   switch (BuiltinID) {
4352   default: break;
4353 
4354   case NEON::BI__builtin_neon_vget_lane_i8:
4355   case NEON::BI__builtin_neon_vget_lane_i16:
4356   case NEON::BI__builtin_neon_vget_lane_i32:
4357   case NEON::BI__builtin_neon_vget_lane_i64:
4358   case NEON::BI__builtin_neon_vget_lane_f32:
4359   case NEON::BI__builtin_neon_vgetq_lane_i8:
4360   case NEON::BI__builtin_neon_vgetq_lane_i16:
4361   case NEON::BI__builtin_neon_vgetq_lane_i32:
4362   case NEON::BI__builtin_neon_vgetq_lane_i64:
4363   case NEON::BI__builtin_neon_vgetq_lane_f32:
4364     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
4365 
4366   case NEON::BI__builtin_neon_vset_lane_i8:
4367   case NEON::BI__builtin_neon_vset_lane_i16:
4368   case NEON::BI__builtin_neon_vset_lane_i32:
4369   case NEON::BI__builtin_neon_vset_lane_i64:
4370   case NEON::BI__builtin_neon_vset_lane_f32:
4371   case NEON::BI__builtin_neon_vsetq_lane_i8:
4372   case NEON::BI__builtin_neon_vsetq_lane_i16:
4373   case NEON::BI__builtin_neon_vsetq_lane_i32:
4374   case NEON::BI__builtin_neon_vsetq_lane_i64:
4375   case NEON::BI__builtin_neon_vsetq_lane_f32:
4376     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4377 
4378   case NEON::BI__builtin_neon_vsha1h_u32:
4379     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
4380                         "vsha1h");
4381   case NEON::BI__builtin_neon_vsha1cq_u32:
4382     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
4383                         "vsha1h");
4384   case NEON::BI__builtin_neon_vsha1pq_u32:
4385     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
4386                         "vsha1h");
4387   case NEON::BI__builtin_neon_vsha1mq_u32:
4388     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
4389                         "vsha1h");
4390 
4391   // The ARM _MoveToCoprocessor builtins put the input register value as
4392   // the first argument, but the LLVM intrinsic expects it as the third one.
4393   case ARM::BI_MoveToCoprocessor:
4394   case ARM::BI_MoveToCoprocessor2: {
4395     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
4396                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
4397     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
4398                                   Ops[3], Ops[4], Ops[5]});
4399   }
4400   }
4401 
4402   // Get the last argument, which specifies the vector type.
4403   assert(HasExtraArg);
4404   llvm::APSInt Result;
4405   const Expr *Arg = E->getArg(E->getNumArgs()-1);
4406   if (!Arg->isIntegerConstantExpr(Result, getContext()))
4407     return nullptr;
4408 
4409   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
4410       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
4411     // Determine the overloaded type of this builtin.
4412     llvm::Type *Ty;
4413     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
4414       Ty = FloatTy;
4415     else
4416       Ty = DoubleTy;
4417 
4418     // Determine whether this is an unsigned conversion or not.
4419     bool usgn = Result.getZExtValue() == 1;
4420     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
4421 
4422     // Call the appropriate intrinsic.
4423     Function *F = CGM.getIntrinsic(Int, Ty);
4424     return Builder.CreateCall(F, Ops, "vcvtr");
4425   }
4426 
4427   // Determine the type of this overloaded NEON intrinsic.
4428   NeonTypeFlags Type(Result.getZExtValue());
4429   bool usgn = Type.isUnsigned();
4430   bool rightShift = false;
4431 
4432   llvm::VectorType *VTy = GetNeonType(this, Type);
4433   llvm::Type *Ty = VTy;
4434   if (!Ty)
4435     return nullptr;
4436 
4437   // Many NEON builtins have identical semantics and uses in ARM and
4438   // AArch64. Emit these in a single function.
4439   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
4440   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4441       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
4442   if (Builtin)
4443     return EmitCommonNeonBuiltinExpr(
4444         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
4445         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
4446 
4447   unsigned Int;
4448   switch (BuiltinID) {
4449   default: return nullptr;
4450   case NEON::BI__builtin_neon_vld1q_lane_v:
4451     // Handle 64-bit integer elements as a special case.  Use shuffles of
4452     // one-element vectors to avoid poor code for i64 in the backend.
4453     if (VTy->getElementType()->isIntegerTy(64)) {
4454       // Extract the other lane.
4455       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4456       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
4457       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
4458       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4459       // Load the value as a one-element vector.
4460       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
4461       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4462       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
4463       Value *Align = getAlignmentValue32(PtrOp0);
4464       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
4465       // Combine them.
4466       uint32_t Indices[] = {1 - Lane, Lane};
4467       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
4468       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
4469     }
4470     // fall through
4471   case NEON::BI__builtin_neon_vld1_lane_v: {
4472     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4473     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
4474     Value *Ld = Builder.CreateLoad(PtrOp0);
4475     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
4476   }
4477   case NEON::BI__builtin_neon_vld2_dup_v:
4478   case NEON::BI__builtin_neon_vld3_dup_v:
4479   case NEON::BI__builtin_neon_vld4_dup_v: {
4480     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
4481     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
4482       switch (BuiltinID) {
4483       case NEON::BI__builtin_neon_vld2_dup_v:
4484         Int = Intrinsic::arm_neon_vld2;
4485         break;
4486       case NEON::BI__builtin_neon_vld3_dup_v:
4487         Int = Intrinsic::arm_neon_vld3;
4488         break;
4489       case NEON::BI__builtin_neon_vld4_dup_v:
4490         Int = Intrinsic::arm_neon_vld4;
4491         break;
4492       default: llvm_unreachable("unknown vld_dup intrinsic?");
4493       }
4494       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4495       Function *F = CGM.getIntrinsic(Int, Tys);
4496       llvm::Value *Align = getAlignmentValue32(PtrOp1);
4497       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
4498       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4499       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4500       return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4501     }
4502     switch (BuiltinID) {
4503     case NEON::BI__builtin_neon_vld2_dup_v:
4504       Int = Intrinsic::arm_neon_vld2lane;
4505       break;
4506     case NEON::BI__builtin_neon_vld3_dup_v:
4507       Int = Intrinsic::arm_neon_vld3lane;
4508       break;
4509     case NEON::BI__builtin_neon_vld4_dup_v:
4510       Int = Intrinsic::arm_neon_vld4lane;
4511       break;
4512     default: llvm_unreachable("unknown vld_dup intrinsic?");
4513     }
4514     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4515     Function *F = CGM.getIntrinsic(Int, Tys);
4516     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
4517 
4518     SmallVector<Value*, 6> Args;
4519     Args.push_back(Ops[1]);
4520     Args.append(STy->getNumElements(), UndefValue::get(Ty));
4521 
4522     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
4523     Args.push_back(CI);
4524     Args.push_back(getAlignmentValue32(PtrOp1));
4525 
4526     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
4527     // splat lane 0 to all elts in each vector of the result.
4528     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
4529       Value *Val = Builder.CreateExtractValue(Ops[1], i);
4530       Value *Elt = Builder.CreateBitCast(Val, Ty);
4531       Elt = EmitNeonSplat(Elt, CI);
4532       Elt = Builder.CreateBitCast(Elt, Val->getType());
4533       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
4534     }
4535     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4536     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4537     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4538   }
4539   case NEON::BI__builtin_neon_vqrshrn_n_v:
4540     Int =
4541       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
4542     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
4543                         1, true);
4544   case NEON::BI__builtin_neon_vqrshrun_n_v:
4545     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
4546                         Ops, "vqrshrun_n", 1, true);
4547   case NEON::BI__builtin_neon_vqshrn_n_v:
4548     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
4549     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
4550                         1, true);
4551   case NEON::BI__builtin_neon_vqshrun_n_v:
4552     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
4553                         Ops, "vqshrun_n", 1, true);
4554   case NEON::BI__builtin_neon_vrecpe_v:
4555   case NEON::BI__builtin_neon_vrecpeq_v:
4556     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
4557                         Ops, "vrecpe");
4558   case NEON::BI__builtin_neon_vrshrn_n_v:
4559     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
4560                         Ops, "vrshrn_n", 1, true);
4561   case NEON::BI__builtin_neon_vrsra_n_v:
4562   case NEON::BI__builtin_neon_vrsraq_n_v:
4563     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4564     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4565     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
4566     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
4567     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
4568     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
4569   case NEON::BI__builtin_neon_vsri_n_v:
4570   case NEON::BI__builtin_neon_vsriq_n_v:
4571     rightShift = true;
4572   case NEON::BI__builtin_neon_vsli_n_v:
4573   case NEON::BI__builtin_neon_vsliq_n_v:
4574     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
4575     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
4576                         Ops, "vsli_n");
4577   case NEON::BI__builtin_neon_vsra_n_v:
4578   case NEON::BI__builtin_neon_vsraq_n_v:
4579     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4580     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
4581     return Builder.CreateAdd(Ops[0], Ops[1]);
4582   case NEON::BI__builtin_neon_vst1q_lane_v:
4583     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
4584     // a one-element vector and avoid poor code for i64 in the backend.
4585     if (VTy->getElementType()->isIntegerTy(64)) {
4586       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4587       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
4588       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4589       Ops[2] = getAlignmentValue32(PtrOp0);
4590       llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
4591       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
4592                                                  Tys), Ops);
4593     }
4594     // fall through
4595   case NEON::BI__builtin_neon_vst1_lane_v: {
4596     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4597     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
4598     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4599     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
4600     return St;
4601   }
4602   case NEON::BI__builtin_neon_vtbl1_v:
4603     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
4604                         Ops, "vtbl1");
4605   case NEON::BI__builtin_neon_vtbl2_v:
4606     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
4607                         Ops, "vtbl2");
4608   case NEON::BI__builtin_neon_vtbl3_v:
4609     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
4610                         Ops, "vtbl3");
4611   case NEON::BI__builtin_neon_vtbl4_v:
4612     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
4613                         Ops, "vtbl4");
4614   case NEON::BI__builtin_neon_vtbx1_v:
4615     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
4616                         Ops, "vtbx1");
4617   case NEON::BI__builtin_neon_vtbx2_v:
4618     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
4619                         Ops, "vtbx2");
4620   case NEON::BI__builtin_neon_vtbx3_v:
4621     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
4622                         Ops, "vtbx3");
4623   case NEON::BI__builtin_neon_vtbx4_v:
4624     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
4625                         Ops, "vtbx4");
4626   }
4627 }
4628 
4629 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
4630                                       const CallExpr *E,
4631                                       SmallVectorImpl<Value *> &Ops) {
4632   unsigned int Int = 0;
4633   const char *s = nullptr;
4634 
4635   switch (BuiltinID) {
4636   default:
4637     return nullptr;
4638   case NEON::BI__builtin_neon_vtbl1_v:
4639   case NEON::BI__builtin_neon_vqtbl1_v:
4640   case NEON::BI__builtin_neon_vqtbl1q_v:
4641   case NEON::BI__builtin_neon_vtbl2_v:
4642   case NEON::BI__builtin_neon_vqtbl2_v:
4643   case NEON::BI__builtin_neon_vqtbl2q_v:
4644   case NEON::BI__builtin_neon_vtbl3_v:
4645   case NEON::BI__builtin_neon_vqtbl3_v:
4646   case NEON::BI__builtin_neon_vqtbl3q_v:
4647   case NEON::BI__builtin_neon_vtbl4_v:
4648   case NEON::BI__builtin_neon_vqtbl4_v:
4649   case NEON::BI__builtin_neon_vqtbl4q_v:
4650     break;
4651   case NEON::BI__builtin_neon_vtbx1_v:
4652   case NEON::BI__builtin_neon_vqtbx1_v:
4653   case NEON::BI__builtin_neon_vqtbx1q_v:
4654   case NEON::BI__builtin_neon_vtbx2_v:
4655   case NEON::BI__builtin_neon_vqtbx2_v:
4656   case NEON::BI__builtin_neon_vqtbx2q_v:
4657   case NEON::BI__builtin_neon_vtbx3_v:
4658   case NEON::BI__builtin_neon_vqtbx3_v:
4659   case NEON::BI__builtin_neon_vqtbx3q_v:
4660   case NEON::BI__builtin_neon_vtbx4_v:
4661   case NEON::BI__builtin_neon_vqtbx4_v:
4662   case NEON::BI__builtin_neon_vqtbx4q_v:
4663     break;
4664   }
4665 
4666   assert(E->getNumArgs() >= 3);
4667 
4668   // Get the last argument, which specifies the vector type.
4669   llvm::APSInt Result;
4670   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
4671   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
4672     return nullptr;
4673 
4674   // Determine the type of this overloaded NEON intrinsic.
4675   NeonTypeFlags Type(Result.getZExtValue());
4676   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
4677   if (!Ty)
4678     return nullptr;
4679 
4680   CodeGen::CGBuilderTy &Builder = CGF.Builder;
4681 
4682   // AArch64 scalar builtins are not overloaded, they do not have an extra
4683   // argument that specifies the vector type, need to handle each case.
4684   switch (BuiltinID) {
4685   case NEON::BI__builtin_neon_vtbl1_v: {
4686     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
4687                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
4688                               "vtbl1");
4689   }
4690   case NEON::BI__builtin_neon_vtbl2_v: {
4691     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
4692                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
4693                               "vtbl1");
4694   }
4695   case NEON::BI__builtin_neon_vtbl3_v: {
4696     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
4697                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
4698                               "vtbl2");
4699   }
4700   case NEON::BI__builtin_neon_vtbl4_v: {
4701     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
4702                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
4703                               "vtbl2");
4704   }
4705   case NEON::BI__builtin_neon_vtbx1_v: {
4706     Value *TblRes =
4707         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
4708                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
4709 
4710     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
4711     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
4712     CmpRes = Builder.CreateSExt(CmpRes, Ty);
4713 
4714     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4715     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4716     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
4717   }
4718   case NEON::BI__builtin_neon_vtbx2_v: {
4719     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
4720                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
4721                               "vtbx1");
4722   }
4723   case NEON::BI__builtin_neon_vtbx3_v: {
4724     Value *TblRes =
4725         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
4726                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
4727 
4728     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
4729     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
4730                                            TwentyFourV);
4731     CmpRes = Builder.CreateSExt(CmpRes, Ty);
4732 
4733     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4734     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4735     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
4736   }
4737   case NEON::BI__builtin_neon_vtbx4_v: {
4738     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
4739                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
4740                               "vtbx2");
4741   }
4742   case NEON::BI__builtin_neon_vqtbl1_v:
4743   case NEON::BI__builtin_neon_vqtbl1q_v:
4744     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
4745   case NEON::BI__builtin_neon_vqtbl2_v:
4746   case NEON::BI__builtin_neon_vqtbl2q_v: {
4747     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
4748   case NEON::BI__builtin_neon_vqtbl3_v:
4749   case NEON::BI__builtin_neon_vqtbl3q_v:
4750     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
4751   case NEON::BI__builtin_neon_vqtbl4_v:
4752   case NEON::BI__builtin_neon_vqtbl4q_v:
4753     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
4754   case NEON::BI__builtin_neon_vqtbx1_v:
4755   case NEON::BI__builtin_neon_vqtbx1q_v:
4756     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
4757   case NEON::BI__builtin_neon_vqtbx2_v:
4758   case NEON::BI__builtin_neon_vqtbx2q_v:
4759     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
4760   case NEON::BI__builtin_neon_vqtbx3_v:
4761   case NEON::BI__builtin_neon_vqtbx3q_v:
4762     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
4763   case NEON::BI__builtin_neon_vqtbx4_v:
4764   case NEON::BI__builtin_neon_vqtbx4q_v:
4765     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
4766   }
4767   }
4768 
4769   if (!Int)
4770     return nullptr;
4771 
4772   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
4773   return CGF.EmitNeonCall(F, Ops, s);
4774 }
4775 
4776 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
4777   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
4778   Op = Builder.CreateBitCast(Op, Int16Ty);
4779   Value *V = UndefValue::get(VTy);
4780   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4781   Op = Builder.CreateInsertElement(V, Op, CI);
4782   return Op;
4783 }
4784 
4785 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
4786                                                const CallExpr *E) {
4787   unsigned HintID = static_cast<unsigned>(-1);
4788   switch (BuiltinID) {
4789   default: break;
4790   case AArch64::BI__builtin_arm_nop:
4791     HintID = 0;
4792     break;
4793   case AArch64::BI__builtin_arm_yield:
4794     HintID = 1;
4795     break;
4796   case AArch64::BI__builtin_arm_wfe:
4797     HintID = 2;
4798     break;
4799   case AArch64::BI__builtin_arm_wfi:
4800     HintID = 3;
4801     break;
4802   case AArch64::BI__builtin_arm_sev:
4803     HintID = 4;
4804     break;
4805   case AArch64::BI__builtin_arm_sevl:
4806     HintID = 5;
4807     break;
4808   }
4809 
4810   if (HintID != static_cast<unsigned>(-1)) {
4811     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
4812     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
4813   }
4814 
4815   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
4816     Value *Address         = EmitScalarExpr(E->getArg(0));
4817     Value *RW              = EmitScalarExpr(E->getArg(1));
4818     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
4819     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
4820     Value *IsData          = EmitScalarExpr(E->getArg(4));
4821 
4822     Value *Locality = nullptr;
4823     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
4824       // Temporal fetch, needs to convert cache level to locality.
4825       Locality = llvm::ConstantInt::get(Int32Ty,
4826         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
4827     } else {
4828       // Streaming fetch.
4829       Locality = llvm::ConstantInt::get(Int32Ty, 0);
4830     }
4831 
4832     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
4833     // PLDL3STRM or PLDL2STRM.
4834     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4835     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4836   }
4837 
4838   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
4839     assert((getContext().getTypeSize(E->getType()) == 32) &&
4840            "rbit of unusual size!");
4841     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4842     return Builder.CreateCall(
4843         CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
4844   }
4845   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
4846     assert((getContext().getTypeSize(E->getType()) == 64) &&
4847            "rbit of unusual size!");
4848     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4849     return Builder.CreateCall(
4850         CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
4851   }
4852 
4853   if (BuiltinID == AArch64::BI__clear_cache) {
4854     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4855     const FunctionDecl *FD = E->getDirectCallee();
4856     Value *Ops[2];
4857     for (unsigned i = 0; i < 2; i++)
4858       Ops[i] = EmitScalarExpr(E->getArg(i));
4859     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4860     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4861     StringRef Name = FD->getName();
4862     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4863   }
4864 
4865   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
4866       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
4867       getContext().getTypeSize(E->getType()) == 128) {
4868     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
4869                                        ? Intrinsic::aarch64_ldaxp
4870                                        : Intrinsic::aarch64_ldxp);
4871 
4872     Value *LdPtr = EmitScalarExpr(E->getArg(0));
4873     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4874                                     "ldxp");
4875 
4876     Value *Val0 = Builder.CreateExtractValue(Val, 1);
4877     Value *Val1 = Builder.CreateExtractValue(Val, 0);
4878     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
4879     Val0 = Builder.CreateZExt(Val0, Int128Ty);
4880     Val1 = Builder.CreateZExt(Val1, Int128Ty);
4881 
4882     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
4883     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4884     Val = Builder.CreateOr(Val, Val1);
4885     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4886   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
4887              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
4888     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4889 
4890     QualType Ty = E->getType();
4891     llvm::Type *RealResTy = ConvertType(Ty);
4892     llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
4893                                                   getContext().getTypeSize(Ty));
4894     LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
4895 
4896     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
4897                                        ? Intrinsic::aarch64_ldaxr
4898                                        : Intrinsic::aarch64_ldxr,
4899                                    LoadAddr->getType());
4900     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
4901 
4902     if (RealResTy->isPointerTy())
4903       return Builder.CreateIntToPtr(Val, RealResTy);
4904 
4905     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4906     return Builder.CreateBitCast(Val, RealResTy);
4907   }
4908 
4909   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
4910        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
4911       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
4912     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
4913                                        ? Intrinsic::aarch64_stlxp
4914                                        : Intrinsic::aarch64_stxp);
4915     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr);
4916 
4917     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4918     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
4919 
4920     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
4921     llvm::Value *Val = Builder.CreateLoad(Tmp);
4922 
4923     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4924     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4925     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
4926                                          Int8PtrTy);
4927     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
4928   }
4929 
4930   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
4931       BuiltinID == AArch64::BI__builtin_arm_stlex) {
4932     Value *StoreVal = EmitScalarExpr(E->getArg(0));
4933     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4934 
4935     QualType Ty = E->getArg(0)->getType();
4936     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4937                                                  getContext().getTypeSize(Ty));
4938     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4939 
4940     if (StoreVal->getType()->isPointerTy())
4941       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
4942     else {
4943       StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
4944       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
4945     }
4946 
4947     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
4948                                        ? Intrinsic::aarch64_stlxr
4949                                        : Intrinsic::aarch64_stxr,
4950                                    StoreAddr->getType());
4951     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
4952   }
4953 
4954   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
4955     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
4956     return Builder.CreateCall(F);
4957   }
4958 
4959   // CRC32
4960   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4961   switch (BuiltinID) {
4962   case AArch64::BI__builtin_arm_crc32b:
4963     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
4964   case AArch64::BI__builtin_arm_crc32cb:
4965     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
4966   case AArch64::BI__builtin_arm_crc32h:
4967     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
4968   case AArch64::BI__builtin_arm_crc32ch:
4969     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
4970   case AArch64::BI__builtin_arm_crc32w:
4971     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
4972   case AArch64::BI__builtin_arm_crc32cw:
4973     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
4974   case AArch64::BI__builtin_arm_crc32d:
4975     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
4976   case AArch64::BI__builtin_arm_crc32cd:
4977     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
4978   }
4979 
4980   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4981     Value *Arg0 = EmitScalarExpr(E->getArg(0));
4982     Value *Arg1 = EmitScalarExpr(E->getArg(1));
4983     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4984 
4985     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
4986     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
4987 
4988     return Builder.CreateCall(F, {Arg0, Arg1});
4989   }
4990 
4991   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
4992       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
4993       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
4994       BuiltinID == AArch64::BI__builtin_arm_wsr ||
4995       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
4996       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
4997 
4998     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
4999                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5000                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
5001 
5002     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5003                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
5004 
5005     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
5006                    BuiltinID != AArch64::BI__builtin_arm_wsr;
5007 
5008     llvm::Type *ValueType;
5009     llvm::Type *RegisterType = Int64Ty;
5010     if (IsPointerBuiltin) {
5011       ValueType = VoidPtrTy;
5012     } else if (Is64Bit) {
5013       ValueType = Int64Ty;
5014     } else {
5015       ValueType = Int32Ty;
5016     }
5017 
5018     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5019   }
5020 
5021   // Find out if any arguments are required to be integer constant
5022   // expressions.
5023   unsigned ICEArguments = 0;
5024   ASTContext::GetBuiltinTypeError Error;
5025   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5026   assert(Error == ASTContext::GE_None && "Should not codegen an error");
5027 
5028   llvm::SmallVector<Value*, 4> Ops;
5029   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5030     if ((ICEArguments & (1 << i)) == 0) {
5031       Ops.push_back(EmitScalarExpr(E->getArg(i)));
5032     } else {
5033       // If this is required to be a constant, constant fold it so that we know
5034       // that the generated intrinsic gets a ConstantInt.
5035       llvm::APSInt Result;
5036       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5037       assert(IsConst && "Constant arg isn't actually constant?");
5038       (void)IsConst;
5039       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5040     }
5041   }
5042 
5043   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
5044   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5045       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5046 
5047   if (Builtin) {
5048     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5049     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
5050     assert(Result && "SISD intrinsic should have been handled");
5051     return Result;
5052   }
5053 
5054   llvm::APSInt Result;
5055   const Expr *Arg = E->getArg(E->getNumArgs()-1);
5056   NeonTypeFlags Type(0);
5057   if (Arg->isIntegerConstantExpr(Result, getContext()))
5058     // Determine the type of this overloaded NEON intrinsic.
5059     Type = NeonTypeFlags(Result.getZExtValue());
5060 
5061   bool usgn = Type.isUnsigned();
5062   bool quad = Type.isQuad();
5063 
5064   // Handle non-overloaded intrinsics first.
5065   switch (BuiltinID) {
5066   default: break;
5067   case NEON::BI__builtin_neon_vldrq_p128: {
5068     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5069     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
5070     return Builder.CreateDefaultAlignedLoad(Ptr);
5071   }
5072   case NEON::BI__builtin_neon_vstrq_p128: {
5073     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5074     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
5075     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5076   }
5077   case NEON::BI__builtin_neon_vcvts_u32_f32:
5078   case NEON::BI__builtin_neon_vcvtd_u64_f64:
5079     usgn = true;
5080     // FALL THROUGH
5081   case NEON::BI__builtin_neon_vcvts_s32_f32:
5082   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
5083     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5084     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5085     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5086     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5087     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
5088     if (usgn)
5089       return Builder.CreateFPToUI(Ops[0], InTy);
5090     return Builder.CreateFPToSI(Ops[0], InTy);
5091   }
5092   case NEON::BI__builtin_neon_vcvts_f32_u32:
5093   case NEON::BI__builtin_neon_vcvtd_f64_u64:
5094     usgn = true;
5095     // FALL THROUGH
5096   case NEON::BI__builtin_neon_vcvts_f32_s32:
5097   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5098     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5099     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5100     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5101     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5102     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5103     if (usgn)
5104       return Builder.CreateUIToFP(Ops[0], FTy);
5105     return Builder.CreateSIToFP(Ops[0], FTy);
5106   }
5107   case NEON::BI__builtin_neon_vpaddd_s64: {
5108     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
5109     Value *Vec = EmitScalarExpr(E->getArg(0));
5110     // The vector is v2f64, so make sure it's bitcast to that.
5111     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5112     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5113     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5114     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5115     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5116     // Pairwise addition of a v2f64 into a scalar f64.
5117     return Builder.CreateAdd(Op0, Op1, "vpaddd");
5118   }
5119   case NEON::BI__builtin_neon_vpaddd_f64: {
5120     llvm::Type *Ty =
5121       llvm::VectorType::get(DoubleTy, 2);
5122     Value *Vec = EmitScalarExpr(E->getArg(0));
5123     // The vector is v2f64, so make sure it's bitcast to that.
5124     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5125     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5126     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5127     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5128     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5129     // Pairwise addition of a v2f64 into a scalar f64.
5130     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5131   }
5132   case NEON::BI__builtin_neon_vpadds_f32: {
5133     llvm::Type *Ty =
5134       llvm::VectorType::get(FloatTy, 2);
5135     Value *Vec = EmitScalarExpr(E->getArg(0));
5136     // The vector is v2f32, so make sure it's bitcast to that.
5137     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5138     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5139     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5140     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5141     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5142     // Pairwise addition of a v2f32 into a scalar f32.
5143     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5144   }
5145   case NEON::BI__builtin_neon_vceqzd_s64:
5146   case NEON::BI__builtin_neon_vceqzd_f64:
5147   case NEON::BI__builtin_neon_vceqzs_f32:
5148     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5149     return EmitAArch64CompareBuiltinExpr(
5150         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5151         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5152   case NEON::BI__builtin_neon_vcgezd_s64:
5153   case NEON::BI__builtin_neon_vcgezd_f64:
5154   case NEON::BI__builtin_neon_vcgezs_f32:
5155     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5156     return EmitAArch64CompareBuiltinExpr(
5157         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5158         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5159   case NEON::BI__builtin_neon_vclezd_s64:
5160   case NEON::BI__builtin_neon_vclezd_f64:
5161   case NEON::BI__builtin_neon_vclezs_f32:
5162     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5163     return EmitAArch64CompareBuiltinExpr(
5164         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5165         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
5166   case NEON::BI__builtin_neon_vcgtzd_s64:
5167   case NEON::BI__builtin_neon_vcgtzd_f64:
5168   case NEON::BI__builtin_neon_vcgtzs_f32:
5169     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5170     return EmitAArch64CompareBuiltinExpr(
5171         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5172         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
5173   case NEON::BI__builtin_neon_vcltzd_s64:
5174   case NEON::BI__builtin_neon_vcltzd_f64:
5175   case NEON::BI__builtin_neon_vcltzs_f32:
5176     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5177     return EmitAArch64CompareBuiltinExpr(
5178         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5179         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
5180 
5181   case NEON::BI__builtin_neon_vceqzd_u64: {
5182     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5183     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5184     Ops[0] =
5185         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
5186     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
5187   }
5188   case NEON::BI__builtin_neon_vceqd_f64:
5189   case NEON::BI__builtin_neon_vcled_f64:
5190   case NEON::BI__builtin_neon_vcltd_f64:
5191   case NEON::BI__builtin_neon_vcged_f64:
5192   case NEON::BI__builtin_neon_vcgtd_f64: {
5193     llvm::CmpInst::Predicate P;
5194     switch (BuiltinID) {
5195     default: llvm_unreachable("missing builtin ID in switch!");
5196     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5197     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5198     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5199     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5200     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5201     }
5202     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5203     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5204     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5205     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5206     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5207   }
5208   case NEON::BI__builtin_neon_vceqs_f32:
5209   case NEON::BI__builtin_neon_vcles_f32:
5210   case NEON::BI__builtin_neon_vclts_f32:
5211   case NEON::BI__builtin_neon_vcges_f32:
5212   case NEON::BI__builtin_neon_vcgts_f32: {
5213     llvm::CmpInst::Predicate P;
5214     switch (BuiltinID) {
5215     default: llvm_unreachable("missing builtin ID in switch!");
5216     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5217     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5218     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5219     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5220     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5221     }
5222     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5223     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5224     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5225     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5226     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5227   }
5228   case NEON::BI__builtin_neon_vceqd_s64:
5229   case NEON::BI__builtin_neon_vceqd_u64:
5230   case NEON::BI__builtin_neon_vcgtd_s64:
5231   case NEON::BI__builtin_neon_vcgtd_u64:
5232   case NEON::BI__builtin_neon_vcltd_s64:
5233   case NEON::BI__builtin_neon_vcltd_u64:
5234   case NEON::BI__builtin_neon_vcged_u64:
5235   case NEON::BI__builtin_neon_vcged_s64:
5236   case NEON::BI__builtin_neon_vcled_u64:
5237   case NEON::BI__builtin_neon_vcled_s64: {
5238     llvm::CmpInst::Predicate P;
5239     switch (BuiltinID) {
5240     default: llvm_unreachable("missing builtin ID in switch!");
5241     case NEON::BI__builtin_neon_vceqd_s64:
5242     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5243     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5244     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5245     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5246     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5247     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5248     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5249     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5250     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5251     }
5252     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5253     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5254     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5255     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5256     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5257   }
5258   case NEON::BI__builtin_neon_vtstd_s64:
5259   case NEON::BI__builtin_neon_vtstd_u64: {
5260     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5261     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5262     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5263     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5264     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5265                                 llvm::Constant::getNullValue(Int64Ty));
5266     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5267   }
5268   case NEON::BI__builtin_neon_vset_lane_i8:
5269   case NEON::BI__builtin_neon_vset_lane_i16:
5270   case NEON::BI__builtin_neon_vset_lane_i32:
5271   case NEON::BI__builtin_neon_vset_lane_i64:
5272   case NEON::BI__builtin_neon_vset_lane_f32:
5273   case NEON::BI__builtin_neon_vsetq_lane_i8:
5274   case NEON::BI__builtin_neon_vsetq_lane_i16:
5275   case NEON::BI__builtin_neon_vsetq_lane_i32:
5276   case NEON::BI__builtin_neon_vsetq_lane_i64:
5277   case NEON::BI__builtin_neon_vsetq_lane_f32:
5278     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5279     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5280   case NEON::BI__builtin_neon_vset_lane_f64:
5281     // The vector type needs a cast for the v1f64 variant.
5282     Ops[1] = Builder.CreateBitCast(Ops[1],
5283                                    llvm::VectorType::get(DoubleTy, 1));
5284     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5285     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5286   case NEON::BI__builtin_neon_vsetq_lane_f64:
5287     // The vector type needs a cast for the v2f64 variant.
5288     Ops[1] = Builder.CreateBitCast(Ops[1],
5289         llvm::VectorType::get(DoubleTy, 2));
5290     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5291     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5292 
5293   case NEON::BI__builtin_neon_vget_lane_i8:
5294   case NEON::BI__builtin_neon_vdupb_lane_i8:
5295     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
5296     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5297                                         "vget_lane");
5298   case NEON::BI__builtin_neon_vgetq_lane_i8:
5299   case NEON::BI__builtin_neon_vdupb_laneq_i8:
5300     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
5301     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5302                                         "vgetq_lane");
5303   case NEON::BI__builtin_neon_vget_lane_i16:
5304   case NEON::BI__builtin_neon_vduph_lane_i16:
5305     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
5306     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5307                                         "vget_lane");
5308   case NEON::BI__builtin_neon_vgetq_lane_i16:
5309   case NEON::BI__builtin_neon_vduph_laneq_i16:
5310     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
5311     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5312                                         "vgetq_lane");
5313   case NEON::BI__builtin_neon_vget_lane_i32:
5314   case NEON::BI__builtin_neon_vdups_lane_i32:
5315     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
5316     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5317                                         "vget_lane");
5318   case NEON::BI__builtin_neon_vdups_lane_f32:
5319     Ops[0] = Builder.CreateBitCast(Ops[0],
5320         llvm::VectorType::get(FloatTy, 2));
5321     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5322                                         "vdups_lane");
5323   case NEON::BI__builtin_neon_vgetq_lane_i32:
5324   case NEON::BI__builtin_neon_vdups_laneq_i32:
5325     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
5326     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5327                                         "vgetq_lane");
5328   case NEON::BI__builtin_neon_vget_lane_i64:
5329   case NEON::BI__builtin_neon_vdupd_lane_i64:
5330     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
5331     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5332                                         "vget_lane");
5333   case NEON::BI__builtin_neon_vdupd_lane_f64:
5334     Ops[0] = Builder.CreateBitCast(Ops[0],
5335         llvm::VectorType::get(DoubleTy, 1));
5336     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5337                                         "vdupd_lane");
5338   case NEON::BI__builtin_neon_vgetq_lane_i64:
5339   case NEON::BI__builtin_neon_vdupd_laneq_i64:
5340     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
5341     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5342                                         "vgetq_lane");
5343   case NEON::BI__builtin_neon_vget_lane_f32:
5344     Ops[0] = Builder.CreateBitCast(Ops[0],
5345         llvm::VectorType::get(FloatTy, 2));
5346     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5347                                         "vget_lane");
5348   case NEON::BI__builtin_neon_vget_lane_f64:
5349     Ops[0] = Builder.CreateBitCast(Ops[0],
5350         llvm::VectorType::get(DoubleTy, 1));
5351     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5352                                         "vget_lane");
5353   case NEON::BI__builtin_neon_vgetq_lane_f32:
5354   case NEON::BI__builtin_neon_vdups_laneq_f32:
5355     Ops[0] = Builder.CreateBitCast(Ops[0],
5356         llvm::VectorType::get(FloatTy, 4));
5357     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5358                                         "vgetq_lane");
5359   case NEON::BI__builtin_neon_vgetq_lane_f64:
5360   case NEON::BI__builtin_neon_vdupd_laneq_f64:
5361     Ops[0] = Builder.CreateBitCast(Ops[0],
5362         llvm::VectorType::get(DoubleTy, 2));
5363     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5364                                         "vgetq_lane");
5365   case NEON::BI__builtin_neon_vaddd_s64:
5366   case NEON::BI__builtin_neon_vaddd_u64:
5367     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
5368   case NEON::BI__builtin_neon_vsubd_s64:
5369   case NEON::BI__builtin_neon_vsubd_u64:
5370     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
5371   case NEON::BI__builtin_neon_vqdmlalh_s16:
5372   case NEON::BI__builtin_neon_vqdmlslh_s16: {
5373     SmallVector<Value *, 2> ProductOps;
5374     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5375     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
5376     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5377     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5378                           ProductOps, "vqdmlXl");
5379     Constant *CI = ConstantInt::get(SizeTy, 0);
5380     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5381 
5382     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5383                                         ? Intrinsic::aarch64_neon_sqadd
5384                                         : Intrinsic::aarch64_neon_sqsub;
5385     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5386   }
5387   case NEON::BI__builtin_neon_vqshlud_n_s64: {
5388     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5389     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5390     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5391                         Ops, "vqshlu_n");
5392   }
5393   case NEON::BI__builtin_neon_vqshld_n_u64:
5394   case NEON::BI__builtin_neon_vqshld_n_s64: {
5395     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5396                                    ? Intrinsic::aarch64_neon_uqshl
5397                                    : Intrinsic::aarch64_neon_sqshl;
5398     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5399     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5400     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5401   }
5402   case NEON::BI__builtin_neon_vrshrd_n_u64:
5403   case NEON::BI__builtin_neon_vrshrd_n_s64: {
5404     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5405                                    ? Intrinsic::aarch64_neon_urshl
5406                                    : Intrinsic::aarch64_neon_srshl;
5407     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5408     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5409     Ops[1] = ConstantInt::get(Int64Ty, -SV);
5410     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5411   }
5412   case NEON::BI__builtin_neon_vrsrad_n_u64:
5413   case NEON::BI__builtin_neon_vrsrad_n_s64: {
5414     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5415                                    ? Intrinsic::aarch64_neon_urshl
5416                                    : Intrinsic::aarch64_neon_srshl;
5417     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5418     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
5419     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5420                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5421     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5422   }
5423   case NEON::BI__builtin_neon_vshld_n_s64:
5424   case NEON::BI__builtin_neon_vshld_n_u64: {
5425     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5426     return Builder.CreateShl(
5427         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5428   }
5429   case NEON::BI__builtin_neon_vshrd_n_s64: {
5430     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5431     return Builder.CreateAShr(
5432         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5433                                                    Amt->getZExtValue())),
5434         "shrd_n");
5435   }
5436   case NEON::BI__builtin_neon_vshrd_n_u64: {
5437     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5438     uint64_t ShiftAmt = Amt->getZExtValue();
5439     // Right-shifting an unsigned value by its size yields 0.
5440     if (ShiftAmt == 64)
5441       return ConstantInt::get(Int64Ty, 0);
5442     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
5443                               "shrd_n");
5444   }
5445   case NEON::BI__builtin_neon_vsrad_n_s64: {
5446     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5447     Ops[1] = Builder.CreateAShr(
5448         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5449                                                    Amt->getZExtValue())),
5450         "shrd_n");
5451     return Builder.CreateAdd(Ops[0], Ops[1]);
5452   }
5453   case NEON::BI__builtin_neon_vsrad_n_u64: {
5454     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5455     uint64_t ShiftAmt = Amt->getZExtValue();
5456     // Right-shifting an unsigned value by its size yields 0.
5457     // As Op + 0 = Op, return Ops[0] directly.
5458     if (ShiftAmt == 64)
5459       return Ops[0];
5460     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
5461                                 "shrd_n");
5462     return Builder.CreateAdd(Ops[0], Ops[1]);
5463   }
5464   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5465   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5466   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5467   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5468     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5469                                           "lane");
5470     SmallVector<Value *, 2> ProductOps;
5471     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5472     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5473     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5474     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5475                           ProductOps, "vqdmlXl");
5476     Constant *CI = ConstantInt::get(SizeTy, 0);
5477     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5478     Ops.pop_back();
5479 
5480     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5481                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5482                           ? Intrinsic::aarch64_neon_sqadd
5483                           : Intrinsic::aarch64_neon_sqsub;
5484     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
5485   }
5486   case NEON::BI__builtin_neon_vqdmlals_s32:
5487   case NEON::BI__builtin_neon_vqdmlsls_s32: {
5488     SmallVector<Value *, 2> ProductOps;
5489     ProductOps.push_back(Ops[1]);
5490     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
5491     Ops[1] =
5492         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5493                      ProductOps, "vqdmlXl");
5494 
5495     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5496                                         ? Intrinsic::aarch64_neon_sqadd
5497                                         : Intrinsic::aarch64_neon_sqsub;
5498     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
5499   }
5500   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5501   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5502   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5503   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5504     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5505                                           "lane");
5506     SmallVector<Value *, 2> ProductOps;
5507     ProductOps.push_back(Ops[1]);
5508     ProductOps.push_back(Ops[2]);
5509     Ops[1] =
5510         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5511                      ProductOps, "vqdmlXl");
5512     Ops.pop_back();
5513 
5514     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
5515                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
5516                           ? Intrinsic::aarch64_neon_sqadd
5517                           : Intrinsic::aarch64_neon_sqsub;
5518     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
5519   }
5520   }
5521 
5522   llvm::VectorType *VTy = GetNeonType(this, Type);
5523   llvm::Type *Ty = VTy;
5524   if (!Ty)
5525     return nullptr;
5526 
5527   // Not all intrinsics handled by the common case work for AArch64 yet, so only
5528   // defer to common code if it's been added to our special map.
5529   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
5530                                    AArch64SIMDIntrinsicsProvenSorted);
5531 
5532   if (Builtin)
5533     return EmitCommonNeonBuiltinExpr(
5534         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5535         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
5536         /*never use addresses*/ Address::invalid(), Address::invalid());
5537 
5538   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
5539     return V;
5540 
5541   unsigned Int;
5542   switch (BuiltinID) {
5543   default: return nullptr;
5544   case NEON::BI__builtin_neon_vbsl_v:
5545   case NEON::BI__builtin_neon_vbslq_v: {
5546     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
5547     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
5548     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
5549     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
5550 
5551     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
5552     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
5553     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
5554     return Builder.CreateBitCast(Ops[0], Ty);
5555   }
5556   case NEON::BI__builtin_neon_vfma_lane_v:
5557   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
5558     // The ARM builtins (and instructions) have the addend as the first
5559     // operand, but the 'fma' intrinsics have it last. Swap it around here.
5560     Value *Addend = Ops[0];
5561     Value *Multiplicand = Ops[1];
5562     Value *LaneSource = Ops[2];
5563     Ops[0] = Multiplicand;
5564     Ops[1] = LaneSource;
5565     Ops[2] = Addend;
5566 
5567     // Now adjust things to handle the lane access.
5568     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
5569       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
5570       VTy;
5571     llvm::Constant *cst = cast<Constant>(Ops[3]);
5572     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
5573     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
5574     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
5575 
5576     Ops.pop_back();
5577     Int = Intrinsic::fma;
5578     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
5579   }
5580   case NEON::BI__builtin_neon_vfma_laneq_v: {
5581     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
5582     // v1f64 fma should be mapped to Neon scalar f64 fma
5583     if (VTy && VTy->getElementType() == DoubleTy) {
5584       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5585       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5586       llvm::Type *VTy = GetNeonType(this,
5587         NeonTypeFlags(NeonTypeFlags::Float64, false, true));
5588       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
5589       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
5590       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
5591       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5592       return Builder.CreateBitCast(Result, Ty);
5593     }
5594     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5595     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5596     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5597 
5598     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
5599                                             VTy->getNumElements() * 2);
5600     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
5601     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
5602                                                cast<ConstantInt>(Ops[3]));
5603     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
5604 
5605     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
5606   }
5607   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
5608     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5609     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5610     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5611 
5612     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5613     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
5614     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
5615   }
5616   case NEON::BI__builtin_neon_vfmas_lane_f32:
5617   case NEON::BI__builtin_neon_vfmas_laneq_f32:
5618   case NEON::BI__builtin_neon_vfmad_lane_f64:
5619   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
5620     Ops.push_back(EmitScalarExpr(E->getArg(3)));
5621     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
5622     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5623     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
5624     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5625   }
5626   case NEON::BI__builtin_neon_vmull_v:
5627     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5628     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
5629     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
5630     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
5631   case NEON::BI__builtin_neon_vmax_v:
5632   case NEON::BI__builtin_neon_vmaxq_v:
5633     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5634     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
5635     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
5636     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
5637   case NEON::BI__builtin_neon_vmin_v:
5638   case NEON::BI__builtin_neon_vminq_v:
5639     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5640     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
5641     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
5642     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
5643   case NEON::BI__builtin_neon_vabd_v:
5644   case NEON::BI__builtin_neon_vabdq_v:
5645     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5646     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
5647     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
5648     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
5649   case NEON::BI__builtin_neon_vpadal_v:
5650   case NEON::BI__builtin_neon_vpadalq_v: {
5651     unsigned ArgElts = VTy->getNumElements();
5652     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
5653     unsigned BitWidth = EltTy->getBitWidth();
5654     llvm::Type *ArgTy = llvm::VectorType::get(
5655         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
5656     llvm::Type* Tys[2] = { VTy, ArgTy };
5657     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
5658     SmallVector<llvm::Value*, 1> TmpOps;
5659     TmpOps.push_back(Ops[1]);
5660     Function *F = CGM.getIntrinsic(Int, Tys);
5661     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
5662     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
5663     return Builder.CreateAdd(tmp, addend);
5664   }
5665   case NEON::BI__builtin_neon_vpmin_v:
5666   case NEON::BI__builtin_neon_vpminq_v:
5667     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5668     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
5669     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
5670     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
5671   case NEON::BI__builtin_neon_vpmax_v:
5672   case NEON::BI__builtin_neon_vpmaxq_v:
5673     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5674     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
5675     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
5676     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
5677   case NEON::BI__builtin_neon_vminnm_v:
5678   case NEON::BI__builtin_neon_vminnmq_v:
5679     Int = Intrinsic::aarch64_neon_fminnm;
5680     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
5681   case NEON::BI__builtin_neon_vmaxnm_v:
5682   case NEON::BI__builtin_neon_vmaxnmq_v:
5683     Int = Intrinsic::aarch64_neon_fmaxnm;
5684     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
5685   case NEON::BI__builtin_neon_vrecpss_f32: {
5686     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5687     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
5688                         Ops, "vrecps");
5689   }
5690   case NEON::BI__builtin_neon_vrecpsd_f64: {
5691     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5692     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
5693                         Ops, "vrecps");
5694   }
5695   case NEON::BI__builtin_neon_vqshrun_n_v:
5696     Int = Intrinsic::aarch64_neon_sqshrun;
5697     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
5698   case NEON::BI__builtin_neon_vqrshrun_n_v:
5699     Int = Intrinsic::aarch64_neon_sqrshrun;
5700     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
5701   case NEON::BI__builtin_neon_vqshrn_n_v:
5702     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
5703     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
5704   case NEON::BI__builtin_neon_vrshrn_n_v:
5705     Int = Intrinsic::aarch64_neon_rshrn;
5706     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
5707   case NEON::BI__builtin_neon_vqrshrn_n_v:
5708     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
5709     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
5710   case NEON::BI__builtin_neon_vrnda_v:
5711   case NEON::BI__builtin_neon_vrndaq_v: {
5712     Int = Intrinsic::round;
5713     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
5714   }
5715   case NEON::BI__builtin_neon_vrndi_v:
5716   case NEON::BI__builtin_neon_vrndiq_v: {
5717     Int = Intrinsic::nearbyint;
5718     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
5719   }
5720   case NEON::BI__builtin_neon_vrndm_v:
5721   case NEON::BI__builtin_neon_vrndmq_v: {
5722     Int = Intrinsic::floor;
5723     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
5724   }
5725   case NEON::BI__builtin_neon_vrndn_v:
5726   case NEON::BI__builtin_neon_vrndnq_v: {
5727     Int = Intrinsic::aarch64_neon_frintn;
5728     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
5729   }
5730   case NEON::BI__builtin_neon_vrndp_v:
5731   case NEON::BI__builtin_neon_vrndpq_v: {
5732     Int = Intrinsic::ceil;
5733     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
5734   }
5735   case NEON::BI__builtin_neon_vrndx_v:
5736   case NEON::BI__builtin_neon_vrndxq_v: {
5737     Int = Intrinsic::rint;
5738     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
5739   }
5740   case NEON::BI__builtin_neon_vrnd_v:
5741   case NEON::BI__builtin_neon_vrndq_v: {
5742     Int = Intrinsic::trunc;
5743     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
5744   }
5745   case NEON::BI__builtin_neon_vceqz_v:
5746   case NEON::BI__builtin_neon_vceqzq_v:
5747     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
5748                                          ICmpInst::ICMP_EQ, "vceqz");
5749   case NEON::BI__builtin_neon_vcgez_v:
5750   case NEON::BI__builtin_neon_vcgezq_v:
5751     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
5752                                          ICmpInst::ICMP_SGE, "vcgez");
5753   case NEON::BI__builtin_neon_vclez_v:
5754   case NEON::BI__builtin_neon_vclezq_v:
5755     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
5756                                          ICmpInst::ICMP_SLE, "vclez");
5757   case NEON::BI__builtin_neon_vcgtz_v:
5758   case NEON::BI__builtin_neon_vcgtzq_v:
5759     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
5760                                          ICmpInst::ICMP_SGT, "vcgtz");
5761   case NEON::BI__builtin_neon_vcltz_v:
5762   case NEON::BI__builtin_neon_vcltzq_v:
5763     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
5764                                          ICmpInst::ICMP_SLT, "vcltz");
5765   case NEON::BI__builtin_neon_vcvt_f64_v:
5766   case NEON::BI__builtin_neon_vcvtq_f64_v:
5767     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5768     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
5769     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
5770                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
5771   case NEON::BI__builtin_neon_vcvt_f64_f32: {
5772     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
5773            "unexpected vcvt_f64_f32 builtin");
5774     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
5775     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
5776 
5777     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
5778   }
5779   case NEON::BI__builtin_neon_vcvt_f32_f64: {
5780     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
5781            "unexpected vcvt_f32_f64 builtin");
5782     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
5783     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
5784 
5785     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
5786   }
5787   case NEON::BI__builtin_neon_vcvt_s32_v:
5788   case NEON::BI__builtin_neon_vcvt_u32_v:
5789   case NEON::BI__builtin_neon_vcvt_s64_v:
5790   case NEON::BI__builtin_neon_vcvt_u64_v:
5791   case NEON::BI__builtin_neon_vcvtq_s32_v:
5792   case NEON::BI__builtin_neon_vcvtq_u32_v:
5793   case NEON::BI__builtin_neon_vcvtq_s64_v:
5794   case NEON::BI__builtin_neon_vcvtq_u64_v: {
5795     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
5796     if (usgn)
5797       return Builder.CreateFPToUI(Ops[0], Ty);
5798     return Builder.CreateFPToSI(Ops[0], Ty);
5799   }
5800   case NEON::BI__builtin_neon_vcvta_s32_v:
5801   case NEON::BI__builtin_neon_vcvtaq_s32_v:
5802   case NEON::BI__builtin_neon_vcvta_u32_v:
5803   case NEON::BI__builtin_neon_vcvtaq_u32_v:
5804   case NEON::BI__builtin_neon_vcvta_s64_v:
5805   case NEON::BI__builtin_neon_vcvtaq_s64_v:
5806   case NEON::BI__builtin_neon_vcvta_u64_v:
5807   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
5808     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
5809     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5810     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
5811   }
5812   case NEON::BI__builtin_neon_vcvtm_s32_v:
5813   case NEON::BI__builtin_neon_vcvtmq_s32_v:
5814   case NEON::BI__builtin_neon_vcvtm_u32_v:
5815   case NEON::BI__builtin_neon_vcvtmq_u32_v:
5816   case NEON::BI__builtin_neon_vcvtm_s64_v:
5817   case NEON::BI__builtin_neon_vcvtmq_s64_v:
5818   case NEON::BI__builtin_neon_vcvtm_u64_v:
5819   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
5820     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
5821     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5822     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
5823   }
5824   case NEON::BI__builtin_neon_vcvtn_s32_v:
5825   case NEON::BI__builtin_neon_vcvtnq_s32_v:
5826   case NEON::BI__builtin_neon_vcvtn_u32_v:
5827   case NEON::BI__builtin_neon_vcvtnq_u32_v:
5828   case NEON::BI__builtin_neon_vcvtn_s64_v:
5829   case NEON::BI__builtin_neon_vcvtnq_s64_v:
5830   case NEON::BI__builtin_neon_vcvtn_u64_v:
5831   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
5832     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
5833     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5834     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
5835   }
5836   case NEON::BI__builtin_neon_vcvtp_s32_v:
5837   case NEON::BI__builtin_neon_vcvtpq_s32_v:
5838   case NEON::BI__builtin_neon_vcvtp_u32_v:
5839   case NEON::BI__builtin_neon_vcvtpq_u32_v:
5840   case NEON::BI__builtin_neon_vcvtp_s64_v:
5841   case NEON::BI__builtin_neon_vcvtpq_s64_v:
5842   case NEON::BI__builtin_neon_vcvtp_u64_v:
5843   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
5844     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
5845     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5846     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
5847   }
5848   case NEON::BI__builtin_neon_vmulx_v:
5849   case NEON::BI__builtin_neon_vmulxq_v: {
5850     Int = Intrinsic::aarch64_neon_fmulx;
5851     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
5852   }
5853   case NEON::BI__builtin_neon_vmul_lane_v:
5854   case NEON::BI__builtin_neon_vmul_laneq_v: {
5855     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
5856     bool Quad = false;
5857     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
5858       Quad = true;
5859     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5860     llvm::Type *VTy = GetNeonType(this,
5861       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
5862     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
5863     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
5864     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
5865     return Builder.CreateBitCast(Result, Ty);
5866   }
5867   case NEON::BI__builtin_neon_vnegd_s64:
5868     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
5869   case NEON::BI__builtin_neon_vpmaxnm_v:
5870   case NEON::BI__builtin_neon_vpmaxnmq_v: {
5871     Int = Intrinsic::aarch64_neon_fmaxnmp;
5872     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
5873   }
5874   case NEON::BI__builtin_neon_vpminnm_v:
5875   case NEON::BI__builtin_neon_vpminnmq_v: {
5876     Int = Intrinsic::aarch64_neon_fminnmp;
5877     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
5878   }
5879   case NEON::BI__builtin_neon_vsqrt_v:
5880   case NEON::BI__builtin_neon_vsqrtq_v: {
5881     Int = Intrinsic::sqrt;
5882     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5883     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
5884   }
5885   case NEON::BI__builtin_neon_vrbit_v:
5886   case NEON::BI__builtin_neon_vrbitq_v: {
5887     Int = Intrinsic::aarch64_neon_rbit;
5888     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
5889   }
5890   case NEON::BI__builtin_neon_vaddv_u8:
5891     // FIXME: These are handled by the AArch64 scalar code.
5892     usgn = true;
5893     // FALLTHROUGH
5894   case NEON::BI__builtin_neon_vaddv_s8: {
5895     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5896     Ty = Int32Ty;
5897     VTy = llvm::VectorType::get(Int8Ty, 8);
5898     llvm::Type *Tys[2] = { Ty, VTy };
5899     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5900     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5901     return Builder.CreateTrunc(Ops[0], Int8Ty);
5902   }
5903   case NEON::BI__builtin_neon_vaddv_u16:
5904     usgn = true;
5905     // FALLTHROUGH
5906   case NEON::BI__builtin_neon_vaddv_s16: {
5907     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5908     Ty = Int32Ty;
5909     VTy = llvm::VectorType::get(Int16Ty, 4);
5910     llvm::Type *Tys[2] = { Ty, VTy };
5911     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5912     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5913     return Builder.CreateTrunc(Ops[0], Int16Ty);
5914   }
5915   case NEON::BI__builtin_neon_vaddvq_u8:
5916     usgn = true;
5917     // FALLTHROUGH
5918   case NEON::BI__builtin_neon_vaddvq_s8: {
5919     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5920     Ty = Int32Ty;
5921     VTy = llvm::VectorType::get(Int8Ty, 16);
5922     llvm::Type *Tys[2] = { Ty, VTy };
5923     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5924     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5925     return Builder.CreateTrunc(Ops[0], Int8Ty);
5926   }
5927   case NEON::BI__builtin_neon_vaddvq_u16:
5928     usgn = true;
5929     // FALLTHROUGH
5930   case NEON::BI__builtin_neon_vaddvq_s16: {
5931     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5932     Ty = Int32Ty;
5933     VTy = llvm::VectorType::get(Int16Ty, 8);
5934     llvm::Type *Tys[2] = { Ty, VTy };
5935     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5936     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5937     return Builder.CreateTrunc(Ops[0], Int16Ty);
5938   }
5939   case NEON::BI__builtin_neon_vmaxv_u8: {
5940     Int = Intrinsic::aarch64_neon_umaxv;
5941     Ty = Int32Ty;
5942     VTy = llvm::VectorType::get(Int8Ty, 8);
5943     llvm::Type *Tys[2] = { Ty, VTy };
5944     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5945     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5946     return Builder.CreateTrunc(Ops[0], Int8Ty);
5947   }
5948   case NEON::BI__builtin_neon_vmaxv_u16: {
5949     Int = Intrinsic::aarch64_neon_umaxv;
5950     Ty = Int32Ty;
5951     VTy = llvm::VectorType::get(Int16Ty, 4);
5952     llvm::Type *Tys[2] = { Ty, VTy };
5953     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5954     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5955     return Builder.CreateTrunc(Ops[0], Int16Ty);
5956   }
5957   case NEON::BI__builtin_neon_vmaxvq_u8: {
5958     Int = Intrinsic::aarch64_neon_umaxv;
5959     Ty = Int32Ty;
5960     VTy = llvm::VectorType::get(Int8Ty, 16);
5961     llvm::Type *Tys[2] = { Ty, VTy };
5962     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5963     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5964     return Builder.CreateTrunc(Ops[0], Int8Ty);
5965   }
5966   case NEON::BI__builtin_neon_vmaxvq_u16: {
5967     Int = Intrinsic::aarch64_neon_umaxv;
5968     Ty = Int32Ty;
5969     VTy = llvm::VectorType::get(Int16Ty, 8);
5970     llvm::Type *Tys[2] = { Ty, VTy };
5971     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5972     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5973     return Builder.CreateTrunc(Ops[0], Int16Ty);
5974   }
5975   case NEON::BI__builtin_neon_vmaxv_s8: {
5976     Int = Intrinsic::aarch64_neon_smaxv;
5977     Ty = Int32Ty;
5978     VTy = llvm::VectorType::get(Int8Ty, 8);
5979     llvm::Type *Tys[2] = { Ty, VTy };
5980     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5981     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5982     return Builder.CreateTrunc(Ops[0], Int8Ty);
5983   }
5984   case NEON::BI__builtin_neon_vmaxv_s16: {
5985     Int = Intrinsic::aarch64_neon_smaxv;
5986     Ty = Int32Ty;
5987     VTy = llvm::VectorType::get(Int16Ty, 4);
5988     llvm::Type *Tys[2] = { Ty, VTy };
5989     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5990     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5991     return Builder.CreateTrunc(Ops[0], Int16Ty);
5992   }
5993   case NEON::BI__builtin_neon_vmaxvq_s8: {
5994     Int = Intrinsic::aarch64_neon_smaxv;
5995     Ty = Int32Ty;
5996     VTy = llvm::VectorType::get(Int8Ty, 16);
5997     llvm::Type *Tys[2] = { Ty, VTy };
5998     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5999     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6000     return Builder.CreateTrunc(Ops[0], Int8Ty);
6001   }
6002   case NEON::BI__builtin_neon_vmaxvq_s16: {
6003     Int = Intrinsic::aarch64_neon_smaxv;
6004     Ty = Int32Ty;
6005     VTy = llvm::VectorType::get(Int16Ty, 8);
6006     llvm::Type *Tys[2] = { Ty, VTy };
6007     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6008     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6009     return Builder.CreateTrunc(Ops[0], Int16Ty);
6010   }
6011   case NEON::BI__builtin_neon_vminv_u8: {
6012     Int = Intrinsic::aarch64_neon_uminv;
6013     Ty = Int32Ty;
6014     VTy = llvm::VectorType::get(Int8Ty, 8);
6015     llvm::Type *Tys[2] = { Ty, VTy };
6016     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6017     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6018     return Builder.CreateTrunc(Ops[0], Int8Ty);
6019   }
6020   case NEON::BI__builtin_neon_vminv_u16: {
6021     Int = Intrinsic::aarch64_neon_uminv;
6022     Ty = Int32Ty;
6023     VTy = llvm::VectorType::get(Int16Ty, 4);
6024     llvm::Type *Tys[2] = { Ty, VTy };
6025     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6026     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6027     return Builder.CreateTrunc(Ops[0], Int16Ty);
6028   }
6029   case NEON::BI__builtin_neon_vminvq_u8: {
6030     Int = Intrinsic::aarch64_neon_uminv;
6031     Ty = Int32Ty;
6032     VTy = llvm::VectorType::get(Int8Ty, 16);
6033     llvm::Type *Tys[2] = { Ty, VTy };
6034     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6035     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6036     return Builder.CreateTrunc(Ops[0], Int8Ty);
6037   }
6038   case NEON::BI__builtin_neon_vminvq_u16: {
6039     Int = Intrinsic::aarch64_neon_uminv;
6040     Ty = Int32Ty;
6041     VTy = llvm::VectorType::get(Int16Ty, 8);
6042     llvm::Type *Tys[2] = { Ty, VTy };
6043     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6044     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6045     return Builder.CreateTrunc(Ops[0], Int16Ty);
6046   }
6047   case NEON::BI__builtin_neon_vminv_s8: {
6048     Int = Intrinsic::aarch64_neon_sminv;
6049     Ty = Int32Ty;
6050     VTy = llvm::VectorType::get(Int8Ty, 8);
6051     llvm::Type *Tys[2] = { Ty, VTy };
6052     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6053     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6054     return Builder.CreateTrunc(Ops[0], Int8Ty);
6055   }
6056   case NEON::BI__builtin_neon_vminv_s16: {
6057     Int = Intrinsic::aarch64_neon_sminv;
6058     Ty = Int32Ty;
6059     VTy = llvm::VectorType::get(Int16Ty, 4);
6060     llvm::Type *Tys[2] = { Ty, VTy };
6061     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6062     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6063     return Builder.CreateTrunc(Ops[0], Int16Ty);
6064   }
6065   case NEON::BI__builtin_neon_vminvq_s8: {
6066     Int = Intrinsic::aarch64_neon_sminv;
6067     Ty = Int32Ty;
6068     VTy = llvm::VectorType::get(Int8Ty, 16);
6069     llvm::Type *Tys[2] = { Ty, VTy };
6070     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6071     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6072     return Builder.CreateTrunc(Ops[0], Int8Ty);
6073   }
6074   case NEON::BI__builtin_neon_vminvq_s16: {
6075     Int = Intrinsic::aarch64_neon_sminv;
6076     Ty = Int32Ty;
6077     VTy = llvm::VectorType::get(Int16Ty, 8);
6078     llvm::Type *Tys[2] = { Ty, VTy };
6079     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6080     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6081     return Builder.CreateTrunc(Ops[0], Int16Ty);
6082   }
6083   case NEON::BI__builtin_neon_vmul_n_f64: {
6084     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6085     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
6086     return Builder.CreateFMul(Ops[0], RHS);
6087   }
6088   case NEON::BI__builtin_neon_vaddlv_u8: {
6089     Int = Intrinsic::aarch64_neon_uaddlv;
6090     Ty = Int32Ty;
6091     VTy = llvm::VectorType::get(Int8Ty, 8);
6092     llvm::Type *Tys[2] = { Ty, VTy };
6093     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6094     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6095     return Builder.CreateTrunc(Ops[0], Int16Ty);
6096   }
6097   case NEON::BI__builtin_neon_vaddlv_u16: {
6098     Int = Intrinsic::aarch64_neon_uaddlv;
6099     Ty = Int32Ty;
6100     VTy = llvm::VectorType::get(Int16Ty, 4);
6101     llvm::Type *Tys[2] = { Ty, VTy };
6102     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6103     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6104   }
6105   case NEON::BI__builtin_neon_vaddlvq_u8: {
6106     Int = Intrinsic::aarch64_neon_uaddlv;
6107     Ty = Int32Ty;
6108     VTy = llvm::VectorType::get(Int8Ty, 16);
6109     llvm::Type *Tys[2] = { Ty, VTy };
6110     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6111     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6112     return Builder.CreateTrunc(Ops[0], Int16Ty);
6113   }
6114   case NEON::BI__builtin_neon_vaddlvq_u16: {
6115     Int = Intrinsic::aarch64_neon_uaddlv;
6116     Ty = Int32Ty;
6117     VTy = llvm::VectorType::get(Int16Ty, 8);
6118     llvm::Type *Tys[2] = { Ty, VTy };
6119     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6120     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6121   }
6122   case NEON::BI__builtin_neon_vaddlv_s8: {
6123     Int = Intrinsic::aarch64_neon_saddlv;
6124     Ty = Int32Ty;
6125     VTy = llvm::VectorType::get(Int8Ty, 8);
6126     llvm::Type *Tys[2] = { Ty, VTy };
6127     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6128     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6129     return Builder.CreateTrunc(Ops[0], Int16Ty);
6130   }
6131   case NEON::BI__builtin_neon_vaddlv_s16: {
6132     Int = Intrinsic::aarch64_neon_saddlv;
6133     Ty = Int32Ty;
6134     VTy = llvm::VectorType::get(Int16Ty, 4);
6135     llvm::Type *Tys[2] = { Ty, VTy };
6136     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6137     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6138   }
6139   case NEON::BI__builtin_neon_vaddlvq_s8: {
6140     Int = Intrinsic::aarch64_neon_saddlv;
6141     Ty = Int32Ty;
6142     VTy = llvm::VectorType::get(Int8Ty, 16);
6143     llvm::Type *Tys[2] = { Ty, VTy };
6144     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6145     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6146     return Builder.CreateTrunc(Ops[0], Int16Ty);
6147   }
6148   case NEON::BI__builtin_neon_vaddlvq_s16: {
6149     Int = Intrinsic::aarch64_neon_saddlv;
6150     Ty = Int32Ty;
6151     VTy = llvm::VectorType::get(Int16Ty, 8);
6152     llvm::Type *Tys[2] = { Ty, VTy };
6153     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6154     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6155   }
6156   case NEON::BI__builtin_neon_vsri_n_v:
6157   case NEON::BI__builtin_neon_vsriq_n_v: {
6158     Int = Intrinsic::aarch64_neon_vsri;
6159     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6160     return EmitNeonCall(Intrin, Ops, "vsri_n");
6161   }
6162   case NEON::BI__builtin_neon_vsli_n_v:
6163   case NEON::BI__builtin_neon_vsliq_n_v: {
6164     Int = Intrinsic::aarch64_neon_vsli;
6165     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6166     return EmitNeonCall(Intrin, Ops, "vsli_n");
6167   }
6168   case NEON::BI__builtin_neon_vsra_n_v:
6169   case NEON::BI__builtin_neon_vsraq_n_v:
6170     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6171     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6172     return Builder.CreateAdd(Ops[0], Ops[1]);
6173   case NEON::BI__builtin_neon_vrsra_n_v:
6174   case NEON::BI__builtin_neon_vrsraq_n_v: {
6175     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6176     SmallVector<llvm::Value*,2> TmpOps;
6177     TmpOps.push_back(Ops[1]);
6178     TmpOps.push_back(Ops[2]);
6179     Function* F = CGM.getIntrinsic(Int, Ty);
6180     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6181     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6182     return Builder.CreateAdd(Ops[0], tmp);
6183   }
6184     // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
6185     // of an Align parameter here.
6186   case NEON::BI__builtin_neon_vld1_x2_v:
6187   case NEON::BI__builtin_neon_vld1q_x2_v:
6188   case NEON::BI__builtin_neon_vld1_x3_v:
6189   case NEON::BI__builtin_neon_vld1q_x3_v:
6190   case NEON::BI__builtin_neon_vld1_x4_v:
6191   case NEON::BI__builtin_neon_vld1q_x4_v: {
6192     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6193     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6194     llvm::Type *Tys[2] = { VTy, PTy };
6195     unsigned Int;
6196     switch (BuiltinID) {
6197     case NEON::BI__builtin_neon_vld1_x2_v:
6198     case NEON::BI__builtin_neon_vld1q_x2_v:
6199       Int = Intrinsic::aarch64_neon_ld1x2;
6200       break;
6201     case NEON::BI__builtin_neon_vld1_x3_v:
6202     case NEON::BI__builtin_neon_vld1q_x3_v:
6203       Int = Intrinsic::aarch64_neon_ld1x3;
6204       break;
6205     case NEON::BI__builtin_neon_vld1_x4_v:
6206     case NEON::BI__builtin_neon_vld1q_x4_v:
6207       Int = Intrinsic::aarch64_neon_ld1x4;
6208       break;
6209     }
6210     Function *F = CGM.getIntrinsic(Int, Tys);
6211     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
6212     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6213     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6214     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6215   }
6216   case NEON::BI__builtin_neon_vst1_x2_v:
6217   case NEON::BI__builtin_neon_vst1q_x2_v:
6218   case NEON::BI__builtin_neon_vst1_x3_v:
6219   case NEON::BI__builtin_neon_vst1q_x3_v:
6220   case NEON::BI__builtin_neon_vst1_x4_v:
6221   case NEON::BI__builtin_neon_vst1q_x4_v: {
6222     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6223     llvm::Type *Tys[2] = { VTy, PTy };
6224     unsigned Int;
6225     switch (BuiltinID) {
6226     case NEON::BI__builtin_neon_vst1_x2_v:
6227     case NEON::BI__builtin_neon_vst1q_x2_v:
6228       Int = Intrinsic::aarch64_neon_st1x2;
6229       break;
6230     case NEON::BI__builtin_neon_vst1_x3_v:
6231     case NEON::BI__builtin_neon_vst1q_x3_v:
6232       Int = Intrinsic::aarch64_neon_st1x3;
6233       break;
6234     case NEON::BI__builtin_neon_vst1_x4_v:
6235     case NEON::BI__builtin_neon_vst1q_x4_v:
6236       Int = Intrinsic::aarch64_neon_st1x4;
6237       break;
6238     }
6239     std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6240     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
6241   }
6242   case NEON::BI__builtin_neon_vld1_v:
6243   case NEON::BI__builtin_neon_vld1q_v:
6244     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6245     return Builder.CreateDefaultAlignedLoad(Ops[0]);
6246   case NEON::BI__builtin_neon_vst1_v:
6247   case NEON::BI__builtin_neon_vst1q_v:
6248     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6249     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6250     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6251   case NEON::BI__builtin_neon_vld1_lane_v:
6252   case NEON::BI__builtin_neon_vld1q_lane_v:
6253     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6254     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6255     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6256     Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
6257     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6258   case NEON::BI__builtin_neon_vld1_dup_v:
6259   case NEON::BI__builtin_neon_vld1q_dup_v: {
6260     Value *V = UndefValue::get(Ty);
6261     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6262     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6263     Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
6264     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6265     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6266     return EmitNeonSplat(Ops[0], CI);
6267   }
6268   case NEON::BI__builtin_neon_vst1_lane_v:
6269   case NEON::BI__builtin_neon_vst1q_lane_v:
6270     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6271     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6272     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6273     return Builder.CreateDefaultAlignedStore(Ops[1],
6274                                              Builder.CreateBitCast(Ops[0], Ty));
6275   case NEON::BI__builtin_neon_vld2_v:
6276   case NEON::BI__builtin_neon_vld2q_v: {
6277     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6278     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6279     llvm::Type *Tys[2] = { VTy, PTy };
6280     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6281     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6282     Ops[0] = Builder.CreateBitCast(Ops[0],
6283                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6284     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6285   }
6286   case NEON::BI__builtin_neon_vld3_v:
6287   case NEON::BI__builtin_neon_vld3q_v: {
6288     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6289     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6290     llvm::Type *Tys[2] = { VTy, PTy };
6291     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6292     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6293     Ops[0] = Builder.CreateBitCast(Ops[0],
6294                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6295     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6296   }
6297   case NEON::BI__builtin_neon_vld4_v:
6298   case NEON::BI__builtin_neon_vld4q_v: {
6299     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6300     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6301     llvm::Type *Tys[2] = { VTy, PTy };
6302     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6303     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6304     Ops[0] = Builder.CreateBitCast(Ops[0],
6305                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6306     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6307   }
6308   case NEON::BI__builtin_neon_vld2_dup_v:
6309   case NEON::BI__builtin_neon_vld2q_dup_v: {
6310     llvm::Type *PTy =
6311       llvm::PointerType::getUnqual(VTy->getElementType());
6312     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6313     llvm::Type *Tys[2] = { VTy, PTy };
6314     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6315     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6316     Ops[0] = Builder.CreateBitCast(Ops[0],
6317                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6318     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6319   }
6320   case NEON::BI__builtin_neon_vld3_dup_v:
6321   case NEON::BI__builtin_neon_vld3q_dup_v: {
6322     llvm::Type *PTy =
6323       llvm::PointerType::getUnqual(VTy->getElementType());
6324     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6325     llvm::Type *Tys[2] = { VTy, PTy };
6326     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6327     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6328     Ops[0] = Builder.CreateBitCast(Ops[0],
6329                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6330     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6331   }
6332   case NEON::BI__builtin_neon_vld4_dup_v:
6333   case NEON::BI__builtin_neon_vld4q_dup_v: {
6334     llvm::Type *PTy =
6335       llvm::PointerType::getUnqual(VTy->getElementType());
6336     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6337     llvm::Type *Tys[2] = { VTy, PTy };
6338     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6339     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6340     Ops[0] = Builder.CreateBitCast(Ops[0],
6341                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6342     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6343   }
6344   case NEON::BI__builtin_neon_vld2_lane_v:
6345   case NEON::BI__builtin_neon_vld2q_lane_v: {
6346     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6347     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6348     Ops.push_back(Ops[1]);
6349     Ops.erase(Ops.begin()+1);
6350     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6351     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6352     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6353     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
6354     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6355     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6356     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6357   }
6358   case NEON::BI__builtin_neon_vld3_lane_v:
6359   case NEON::BI__builtin_neon_vld3q_lane_v: {
6360     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6361     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6362     Ops.push_back(Ops[1]);
6363     Ops.erase(Ops.begin()+1);
6364     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6365     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6366     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6367     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6368     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
6369     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6370     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6371     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6372   }
6373   case NEON::BI__builtin_neon_vld4_lane_v:
6374   case NEON::BI__builtin_neon_vld4q_lane_v: {
6375     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6376     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6377     Ops.push_back(Ops[1]);
6378     Ops.erase(Ops.begin()+1);
6379     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6380     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6381     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6382     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6383     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6384     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
6385     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6386     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6387     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6388   }
6389   case NEON::BI__builtin_neon_vst2_v:
6390   case NEON::BI__builtin_neon_vst2q_v: {
6391     Ops.push_back(Ops[0]);
6392     Ops.erase(Ops.begin());
6393     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6394     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6395                         Ops, "");
6396   }
6397   case NEON::BI__builtin_neon_vst2_lane_v:
6398   case NEON::BI__builtin_neon_vst2q_lane_v: {
6399     Ops.push_back(Ops[0]);
6400     Ops.erase(Ops.begin());
6401     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6402     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6403     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6404                         Ops, "");
6405   }
6406   case NEON::BI__builtin_neon_vst3_v:
6407   case NEON::BI__builtin_neon_vst3q_v: {
6408     Ops.push_back(Ops[0]);
6409     Ops.erase(Ops.begin());
6410     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6411     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6412                         Ops, "");
6413   }
6414   case NEON::BI__builtin_neon_vst3_lane_v:
6415   case NEON::BI__builtin_neon_vst3q_lane_v: {
6416     Ops.push_back(Ops[0]);
6417     Ops.erase(Ops.begin());
6418     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6419     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6420     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6421                         Ops, "");
6422   }
6423   case NEON::BI__builtin_neon_vst4_v:
6424   case NEON::BI__builtin_neon_vst4q_v: {
6425     Ops.push_back(Ops[0]);
6426     Ops.erase(Ops.begin());
6427     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6428     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6429                         Ops, "");
6430   }
6431   case NEON::BI__builtin_neon_vst4_lane_v:
6432   case NEON::BI__builtin_neon_vst4q_lane_v: {
6433     Ops.push_back(Ops[0]);
6434     Ops.erase(Ops.begin());
6435     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6436     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6437     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6438                         Ops, "");
6439   }
6440   case NEON::BI__builtin_neon_vtrn_v:
6441   case NEON::BI__builtin_neon_vtrnq_v: {
6442     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6443     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6444     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6445     Value *SV = nullptr;
6446 
6447     for (unsigned vi = 0; vi != 2; ++vi) {
6448       SmallVector<uint32_t, 16> Indices;
6449       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6450         Indices.push_back(i+vi);
6451         Indices.push_back(i+e+vi);
6452       }
6453       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6454       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
6455       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6456     }
6457     return SV;
6458   }
6459   case NEON::BI__builtin_neon_vuzp_v:
6460   case NEON::BI__builtin_neon_vuzpq_v: {
6461     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6462     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6463     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6464     Value *SV = nullptr;
6465 
6466     for (unsigned vi = 0; vi != 2; ++vi) {
6467       SmallVector<uint32_t, 16> Indices;
6468       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6469         Indices.push_back(2*i+vi);
6470 
6471       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6472       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
6473       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6474     }
6475     return SV;
6476   }
6477   case NEON::BI__builtin_neon_vzip_v:
6478   case NEON::BI__builtin_neon_vzipq_v: {
6479     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6480     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6481     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6482     Value *SV = nullptr;
6483 
6484     for (unsigned vi = 0; vi != 2; ++vi) {
6485       SmallVector<uint32_t, 16> Indices;
6486       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6487         Indices.push_back((i + vi*e) >> 1);
6488         Indices.push_back(((i + vi*e) >> 1)+e);
6489       }
6490       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6491       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
6492       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6493     }
6494     return SV;
6495   }
6496   case NEON::BI__builtin_neon_vqtbl1q_v: {
6497     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
6498                         Ops, "vtbl1");
6499   }
6500   case NEON::BI__builtin_neon_vqtbl2q_v: {
6501     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
6502                         Ops, "vtbl2");
6503   }
6504   case NEON::BI__builtin_neon_vqtbl3q_v: {
6505     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
6506                         Ops, "vtbl3");
6507   }
6508   case NEON::BI__builtin_neon_vqtbl4q_v: {
6509     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
6510                         Ops, "vtbl4");
6511   }
6512   case NEON::BI__builtin_neon_vqtbx1q_v: {
6513     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
6514                         Ops, "vtbx1");
6515   }
6516   case NEON::BI__builtin_neon_vqtbx2q_v: {
6517     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
6518                         Ops, "vtbx2");
6519   }
6520   case NEON::BI__builtin_neon_vqtbx3q_v: {
6521     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
6522                         Ops, "vtbx3");
6523   }
6524   case NEON::BI__builtin_neon_vqtbx4q_v: {
6525     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
6526                         Ops, "vtbx4");
6527   }
6528   case NEON::BI__builtin_neon_vsqadd_v:
6529   case NEON::BI__builtin_neon_vsqaddq_v: {
6530     Int = Intrinsic::aarch64_neon_usqadd;
6531     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
6532   }
6533   case NEON::BI__builtin_neon_vuqadd_v:
6534   case NEON::BI__builtin_neon_vuqaddq_v: {
6535     Int = Intrinsic::aarch64_neon_suqadd;
6536     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
6537   }
6538   }
6539 }
6540 
6541 llvm::Value *CodeGenFunction::
6542 BuildVector(ArrayRef<llvm::Value*> Ops) {
6543   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
6544          "Not a power-of-two sized vector!");
6545   bool AllConstants = true;
6546   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
6547     AllConstants &= isa<Constant>(Ops[i]);
6548 
6549   // If this is a constant vector, create a ConstantVector.
6550   if (AllConstants) {
6551     SmallVector<llvm::Constant*, 16> CstOps;
6552     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6553       CstOps.push_back(cast<Constant>(Ops[i]));
6554     return llvm::ConstantVector::get(CstOps);
6555   }
6556 
6557   // Otherwise, insertelement the values to build the vector.
6558   Value *Result =
6559     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
6560 
6561   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6562     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
6563 
6564   return Result;
6565 }
6566 
6567 // Convert the mask from an integer type to a vector of i1.
6568 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
6569                               unsigned NumElts) {
6570 
6571   llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
6572                          cast<IntegerType>(Mask->getType())->getBitWidth());
6573   Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
6574 
6575   // If we have less than 8 elements, then the starting mask was an i8 and
6576   // we need to extract down to the right number of elements.
6577   if (NumElts < 8) {
6578     uint32_t Indices[4];
6579     for (unsigned i = 0; i != NumElts; ++i)
6580       Indices[i] = i;
6581     MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
6582                                              makeArrayRef(Indices, NumElts),
6583                                              "extract");
6584   }
6585   return MaskVec;
6586 }
6587 
6588 static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
6589                                  SmallVectorImpl<Value *> &Ops,
6590                                  unsigned Align) {
6591   // Cast the pointer to right type.
6592   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
6593                                llvm::PointerType::getUnqual(Ops[1]->getType()));
6594 
6595   // If the mask is all ones just emit a regular store.
6596   if (const auto *C = dyn_cast<Constant>(Ops[2]))
6597     if (C->isAllOnesValue())
6598       return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
6599 
6600   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
6601                                    Ops[1]->getType()->getVectorNumElements());
6602 
6603   return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
6604 }
6605 
6606 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
6607                                 SmallVectorImpl<Value *> &Ops, unsigned Align) {
6608   // Cast the pointer to right type.
6609   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
6610                                llvm::PointerType::getUnqual(Ops[1]->getType()));
6611 
6612   // If the mask is all ones just emit a regular store.
6613   if (const auto *C = dyn_cast<Constant>(Ops[2]))
6614     if (C->isAllOnesValue())
6615       return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
6616 
6617   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
6618                                    Ops[1]->getType()->getVectorNumElements());
6619 
6620   return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
6621 }
6622 
6623 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
6624                                         SmallVectorImpl<Value *> &Ops,
6625                                         llvm::Type *DstTy,
6626                                         unsigned SrcSizeInBits,
6627                                         unsigned Align) {
6628   // Load the subvector.
6629   Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
6630 
6631   // Create broadcast mask.
6632   unsigned NumDstElts = DstTy->getVectorNumElements();
6633   unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
6634 
6635   SmallVector<uint32_t, 8> Mask;
6636   for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
6637     for (unsigned j = 0; j != NumSrcElts; ++j)
6638       Mask.push_back(j);
6639 
6640   return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
6641 }
6642 
6643 static Value *EmitX86Select(CodeGenFunction &CGF,
6644                             Value *Mask, Value *Op0, Value *Op1) {
6645 
6646   // If the mask is all ones just return first argument.
6647   if (const auto *C = dyn_cast<Constant>(Mask))
6648     if (C->isAllOnesValue())
6649       return Op0;
6650 
6651   Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
6652 
6653   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
6654 }
6655 
6656 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
6657                                    bool Signed, SmallVectorImpl<Value *> &Ops) {
6658   unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
6659   Value *Cmp;
6660 
6661   if (CC == 3) {
6662     Cmp = Constant::getNullValue(
6663                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
6664   } else if (CC == 7) {
6665     Cmp = Constant::getAllOnesValue(
6666                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
6667   } else {
6668     ICmpInst::Predicate Pred;
6669     switch (CC) {
6670     default: llvm_unreachable("Unknown condition code");
6671     case 0: Pred = ICmpInst::ICMP_EQ;  break;
6672     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
6673     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
6674     case 4: Pred = ICmpInst::ICMP_NE;  break;
6675     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
6676     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
6677     }
6678     Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
6679   }
6680 
6681   const auto *C = dyn_cast<Constant>(Ops.back());
6682   if (!C || !C->isAllOnesValue())
6683     Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
6684 
6685   if (NumElts < 8) {
6686     uint32_t Indices[8];
6687     for (unsigned i = 0; i != NumElts; ++i)
6688       Indices[i] = i;
6689     for (unsigned i = NumElts; i != 8; ++i)
6690       Indices[i] = i % NumElts + NumElts;
6691     Cmp = CGF.Builder.CreateShuffleVector(
6692         Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
6693   }
6694   return CGF.Builder.CreateBitCast(Cmp,
6695                                    IntegerType::get(CGF.getLLVMContext(),
6696                                                     std::max(NumElts, 8U)));
6697 }
6698 
6699 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
6700                                            const CallExpr *E) {
6701   if (BuiltinID == X86::BI__builtin_ms_va_start ||
6702       BuiltinID == X86::BI__builtin_ms_va_end)
6703     return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
6704                           BuiltinID == X86::BI__builtin_ms_va_start);
6705   if (BuiltinID == X86::BI__builtin_ms_va_copy) {
6706     // Lower this manually. We can't reliably determine whether or not any
6707     // given va_copy() is for a Win64 va_list from the calling convention
6708     // alone, because it's legal to do this from a System V ABI function.
6709     // With opaque pointer types, we won't have enough information in LLVM
6710     // IR to determine this from the argument types, either. Best to do it
6711     // now, while we have enough information.
6712     Address DestAddr = EmitMSVAListRef(E->getArg(0));
6713     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6714 
6715     llvm::Type *BPP = Int8PtrPtrTy;
6716 
6717     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
6718                        DestAddr.getAlignment());
6719     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
6720                       SrcAddr.getAlignment());
6721 
6722     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6723     return Builder.CreateStore(ArgPtr, DestAddr);
6724   }
6725 
6726   SmallVector<Value*, 4> Ops;
6727 
6728   // Find out if any arguments are required to be integer constant expressions.
6729   unsigned ICEArguments = 0;
6730   ASTContext::GetBuiltinTypeError Error;
6731   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6732   assert(Error == ASTContext::GE_None && "Should not codegen an error");
6733 
6734   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
6735     // If this is a normal argument, just emit it as a scalar.
6736     if ((ICEArguments & (1 << i)) == 0) {
6737       Ops.push_back(EmitScalarExpr(E->getArg(i)));
6738       continue;
6739     }
6740 
6741     // If this is required to be a constant, constant fold it so that we know
6742     // that the generated intrinsic gets a ConstantInt.
6743     llvm::APSInt Result;
6744     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
6745     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
6746     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
6747   }
6748 
6749   // These exist so that the builtin that takes an immediate can be bounds
6750   // checked by clang to avoid passing bad immediates to the backend. Since
6751   // AVX has a larger immediate than SSE we would need separate builtins to
6752   // do the different bounds checking. Rather than create a clang specific
6753   // SSE only builtin, this implements eight separate builtins to match gcc
6754   // implementation.
6755   auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
6756     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
6757     llvm::Function *F = CGM.getIntrinsic(ID);
6758     return Builder.CreateCall(F, Ops);
6759   };
6760 
6761   // For the vector forms of FP comparisons, translate the builtins directly to
6762   // IR.
6763   // TODO: The builtins could be removed if the SSE header files used vector
6764   // extension comparisons directly (vector ordered/unordered may need
6765   // additional support via __builtin_isnan()).
6766   auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
6767     Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
6768     llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
6769     llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
6770     Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
6771     return Builder.CreateBitCast(Sext, FPVecTy);
6772   };
6773 
6774   switch (BuiltinID) {
6775   default: return nullptr;
6776   case X86::BI__builtin_cpu_supports: {
6777     const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
6778     StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
6779 
6780     // TODO: When/if this becomes more than x86 specific then use a TargetInfo
6781     // based mapping.
6782     // Processor features and mapping to processor feature value.
6783     enum X86Features {
6784       CMOV = 0,
6785       MMX,
6786       POPCNT,
6787       SSE,
6788       SSE2,
6789       SSE3,
6790       SSSE3,
6791       SSE4_1,
6792       SSE4_2,
6793       AVX,
6794       AVX2,
6795       SSE4_A,
6796       FMA4,
6797       XOP,
6798       FMA,
6799       AVX512F,
6800       BMI,
6801       BMI2,
6802       AES,
6803       PCLMUL,
6804       AVX512VL,
6805       AVX512BW,
6806       AVX512DQ,
6807       AVX512CD,
6808       AVX512ER,
6809       AVX512PF,
6810       AVX512VBMI,
6811       AVX512IFMA,
6812       MAX
6813     };
6814 
6815     X86Features Feature = StringSwitch<X86Features>(FeatureStr)
6816                               .Case("cmov", X86Features::CMOV)
6817                               .Case("mmx", X86Features::MMX)
6818                               .Case("popcnt", X86Features::POPCNT)
6819                               .Case("sse", X86Features::SSE)
6820                               .Case("sse2", X86Features::SSE2)
6821                               .Case("sse3", X86Features::SSE3)
6822                               .Case("ssse3", X86Features::SSSE3)
6823                               .Case("sse4.1", X86Features::SSE4_1)
6824                               .Case("sse4.2", X86Features::SSE4_2)
6825                               .Case("avx", X86Features::AVX)
6826                               .Case("avx2", X86Features::AVX2)
6827                               .Case("sse4a", X86Features::SSE4_A)
6828                               .Case("fma4", X86Features::FMA4)
6829                               .Case("xop", X86Features::XOP)
6830                               .Case("fma", X86Features::FMA)
6831                               .Case("avx512f", X86Features::AVX512F)
6832                               .Case("bmi", X86Features::BMI)
6833                               .Case("bmi2", X86Features::BMI2)
6834                               .Case("aes", X86Features::AES)
6835                               .Case("pclmul", X86Features::PCLMUL)
6836                               .Case("avx512vl", X86Features::AVX512VL)
6837                               .Case("avx512bw", X86Features::AVX512BW)
6838                               .Case("avx512dq", X86Features::AVX512DQ)
6839                               .Case("avx512cd", X86Features::AVX512CD)
6840                               .Case("avx512er", X86Features::AVX512ER)
6841                               .Case("avx512pf", X86Features::AVX512PF)
6842                               .Case("avx512vbmi", X86Features::AVX512VBMI)
6843                               .Case("avx512ifma", X86Features::AVX512IFMA)
6844                               .Default(X86Features::MAX);
6845     assert(Feature != X86Features::MAX && "Invalid feature!");
6846 
6847     // Matching the struct layout from the compiler-rt/libgcc structure that is
6848     // filled in:
6849     // unsigned int __cpu_vendor;
6850     // unsigned int __cpu_type;
6851     // unsigned int __cpu_subtype;
6852     // unsigned int __cpu_features[1];
6853     llvm::Type *STy = llvm::StructType::get(
6854         Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr);
6855 
6856     // Grab the global __cpu_model.
6857     llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
6858 
6859     // Grab the first (0th) element from the field __cpu_features off of the
6860     // global in the struct STy.
6861     Value *Idxs[] = {
6862       ConstantInt::get(Int32Ty, 0),
6863       ConstantInt::get(Int32Ty, 3),
6864       ConstantInt::get(Int32Ty, 0)
6865     };
6866     Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
6867     Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
6868                                                 CharUnits::fromQuantity(4));
6869 
6870     // Check the value of the bit corresponding to the feature requested.
6871     Value *Bitset = Builder.CreateAnd(
6872         Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
6873     return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
6874   }
6875   case X86::BI_mm_prefetch: {
6876     Value *Address = Ops[0];
6877     Value *RW = ConstantInt::get(Int32Ty, 0);
6878     Value *Locality = Ops[1];
6879     Value *Data = ConstantInt::get(Int32Ty, 1);
6880     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
6881     return Builder.CreateCall(F, {Address, RW, Locality, Data});
6882   }
6883   case X86::BI__builtin_ia32_undef128:
6884   case X86::BI__builtin_ia32_undef256:
6885   case X86::BI__builtin_ia32_undef512:
6886     return UndefValue::get(ConvertType(E->getType()));
6887   case X86::BI__builtin_ia32_vec_init_v8qi:
6888   case X86::BI__builtin_ia32_vec_init_v4hi:
6889   case X86::BI__builtin_ia32_vec_init_v2si:
6890     return Builder.CreateBitCast(BuildVector(Ops),
6891                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
6892   case X86::BI__builtin_ia32_vec_ext_v2si:
6893     return Builder.CreateExtractElement(Ops[0],
6894                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
6895   case X86::BI__builtin_ia32_ldmxcsr: {
6896     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
6897     Builder.CreateStore(Ops[0], Tmp);
6898     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
6899                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
6900   }
6901   case X86::BI__builtin_ia32_stmxcsr: {
6902     Address Tmp = CreateMemTemp(E->getType());
6903     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
6904                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
6905     return Builder.CreateLoad(Tmp, "stmxcsr");
6906   }
6907   case X86::BI__builtin_ia32_xsave:
6908   case X86::BI__builtin_ia32_xsave64:
6909   case X86::BI__builtin_ia32_xrstor:
6910   case X86::BI__builtin_ia32_xrstor64:
6911   case X86::BI__builtin_ia32_xsaveopt:
6912   case X86::BI__builtin_ia32_xsaveopt64:
6913   case X86::BI__builtin_ia32_xrstors:
6914   case X86::BI__builtin_ia32_xrstors64:
6915   case X86::BI__builtin_ia32_xsavec:
6916   case X86::BI__builtin_ia32_xsavec64:
6917   case X86::BI__builtin_ia32_xsaves:
6918   case X86::BI__builtin_ia32_xsaves64: {
6919     Intrinsic::ID ID;
6920 #define INTRINSIC_X86_XSAVE_ID(NAME) \
6921     case X86::BI__builtin_ia32_##NAME: \
6922       ID = Intrinsic::x86_##NAME; \
6923       break
6924     switch (BuiltinID) {
6925     default: llvm_unreachable("Unsupported intrinsic!");
6926     INTRINSIC_X86_XSAVE_ID(xsave);
6927     INTRINSIC_X86_XSAVE_ID(xsave64);
6928     INTRINSIC_X86_XSAVE_ID(xrstor);
6929     INTRINSIC_X86_XSAVE_ID(xrstor64);
6930     INTRINSIC_X86_XSAVE_ID(xsaveopt);
6931     INTRINSIC_X86_XSAVE_ID(xsaveopt64);
6932     INTRINSIC_X86_XSAVE_ID(xrstors);
6933     INTRINSIC_X86_XSAVE_ID(xrstors64);
6934     INTRINSIC_X86_XSAVE_ID(xsavec);
6935     INTRINSIC_X86_XSAVE_ID(xsavec64);
6936     INTRINSIC_X86_XSAVE_ID(xsaves);
6937     INTRINSIC_X86_XSAVE_ID(xsaves64);
6938     }
6939 #undef INTRINSIC_X86_XSAVE_ID
6940     Value *Mhi = Builder.CreateTrunc(
6941       Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
6942     Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
6943     Ops[1] = Mhi;
6944     Ops.push_back(Mlo);
6945     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
6946   }
6947   case X86::BI__builtin_ia32_storedqudi128_mask:
6948   case X86::BI__builtin_ia32_storedqusi128_mask:
6949   case X86::BI__builtin_ia32_storedquhi128_mask:
6950   case X86::BI__builtin_ia32_storedquqi128_mask:
6951   case X86::BI__builtin_ia32_storeupd128_mask:
6952   case X86::BI__builtin_ia32_storeups128_mask:
6953   case X86::BI__builtin_ia32_storedqudi256_mask:
6954   case X86::BI__builtin_ia32_storedqusi256_mask:
6955   case X86::BI__builtin_ia32_storedquhi256_mask:
6956   case X86::BI__builtin_ia32_storedquqi256_mask:
6957   case X86::BI__builtin_ia32_storeupd256_mask:
6958   case X86::BI__builtin_ia32_storeups256_mask:
6959   case X86::BI__builtin_ia32_storedqudi512_mask:
6960   case X86::BI__builtin_ia32_storedqusi512_mask:
6961   case X86::BI__builtin_ia32_storedquhi512_mask:
6962   case X86::BI__builtin_ia32_storedquqi512_mask:
6963   case X86::BI__builtin_ia32_storeupd512_mask:
6964   case X86::BI__builtin_ia32_storeups512_mask:
6965     return EmitX86MaskedStore(*this, Ops, 1);
6966 
6967   case X86::BI__builtin_ia32_movdqa32store128_mask:
6968   case X86::BI__builtin_ia32_movdqa64store128_mask:
6969   case X86::BI__builtin_ia32_storeaps128_mask:
6970   case X86::BI__builtin_ia32_storeapd128_mask:
6971   case X86::BI__builtin_ia32_movdqa32store256_mask:
6972   case X86::BI__builtin_ia32_movdqa64store256_mask:
6973   case X86::BI__builtin_ia32_storeaps256_mask:
6974   case X86::BI__builtin_ia32_storeapd256_mask:
6975   case X86::BI__builtin_ia32_movdqa32store512_mask:
6976   case X86::BI__builtin_ia32_movdqa64store512_mask:
6977   case X86::BI__builtin_ia32_storeaps512_mask:
6978   case X86::BI__builtin_ia32_storeapd512_mask: {
6979     unsigned Align =
6980       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
6981     return EmitX86MaskedStore(*this, Ops, Align);
6982   }
6983   case X86::BI__builtin_ia32_loadups128_mask:
6984   case X86::BI__builtin_ia32_loadups256_mask:
6985   case X86::BI__builtin_ia32_loadups512_mask:
6986   case X86::BI__builtin_ia32_loadupd128_mask:
6987   case X86::BI__builtin_ia32_loadupd256_mask:
6988   case X86::BI__builtin_ia32_loadupd512_mask:
6989   case X86::BI__builtin_ia32_loaddquqi128_mask:
6990   case X86::BI__builtin_ia32_loaddquqi256_mask:
6991   case X86::BI__builtin_ia32_loaddquqi512_mask:
6992   case X86::BI__builtin_ia32_loaddquhi128_mask:
6993   case X86::BI__builtin_ia32_loaddquhi256_mask:
6994   case X86::BI__builtin_ia32_loaddquhi512_mask:
6995   case X86::BI__builtin_ia32_loaddqusi128_mask:
6996   case X86::BI__builtin_ia32_loaddqusi256_mask:
6997   case X86::BI__builtin_ia32_loaddqusi512_mask:
6998   case X86::BI__builtin_ia32_loaddqudi128_mask:
6999   case X86::BI__builtin_ia32_loaddqudi256_mask:
7000   case X86::BI__builtin_ia32_loaddqudi512_mask:
7001     return EmitX86MaskedLoad(*this, Ops, 1);
7002 
7003   case X86::BI__builtin_ia32_loadaps128_mask:
7004   case X86::BI__builtin_ia32_loadaps256_mask:
7005   case X86::BI__builtin_ia32_loadaps512_mask:
7006   case X86::BI__builtin_ia32_loadapd128_mask:
7007   case X86::BI__builtin_ia32_loadapd256_mask:
7008   case X86::BI__builtin_ia32_loadapd512_mask:
7009   case X86::BI__builtin_ia32_movdqa32load128_mask:
7010   case X86::BI__builtin_ia32_movdqa32load256_mask:
7011   case X86::BI__builtin_ia32_movdqa32load512_mask:
7012   case X86::BI__builtin_ia32_movdqa64load128_mask:
7013   case X86::BI__builtin_ia32_movdqa64load256_mask:
7014   case X86::BI__builtin_ia32_movdqa64load512_mask: {
7015     unsigned Align =
7016       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7017     return EmitX86MaskedLoad(*this, Ops, Align);
7018   }
7019 
7020   case X86::BI__builtin_ia32_vbroadcastf128_pd256:
7021   case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
7022     llvm::Type *DstTy = ConvertType(E->getType());
7023     return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
7024   }
7025 
7026   case X86::BI__builtin_ia32_storehps:
7027   case X86::BI__builtin_ia32_storelps: {
7028     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
7029     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
7030 
7031     // cast val v2i64
7032     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
7033 
7034     // extract (0, 1)
7035     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
7036     llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
7037     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
7038 
7039     // cast pointer to i64 & store
7040     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
7041     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7042   }
7043   case X86::BI__builtin_ia32_palignr128:
7044   case X86::BI__builtin_ia32_palignr256:
7045   case X86::BI__builtin_ia32_palignr128_mask:
7046   case X86::BI__builtin_ia32_palignr256_mask:
7047   case X86::BI__builtin_ia32_palignr512_mask: {
7048     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7049 
7050     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7051     assert(NumElts % 16 == 0);
7052 
7053     // If palignr is shifting the pair of vectors more than the size of two
7054     // lanes, emit zero.
7055     if (ShiftVal >= 32)
7056       return llvm::Constant::getNullValue(ConvertType(E->getType()));
7057 
7058     // If palignr is shifting the pair of input vectors more than one lane,
7059     // but less than two lanes, convert to shifting in zeroes.
7060     if (ShiftVal > 16) {
7061       ShiftVal -= 16;
7062       Ops[1] = Ops[0];
7063       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
7064     }
7065 
7066     uint32_t Indices[64];
7067     // 256-bit palignr operates on 128-bit lanes so we need to handle that
7068     for (unsigned l = 0; l != NumElts; l += 16) {
7069       for (unsigned i = 0; i != 16; ++i) {
7070         unsigned Idx = ShiftVal + i;
7071         if (Idx >= 16)
7072           Idx += NumElts - 16; // End of lane, switch operand.
7073         Indices[l + i] = Idx + l;
7074       }
7075     }
7076 
7077     Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
7078                                                makeArrayRef(Indices, NumElts),
7079                                                "palignr");
7080 
7081     // If this isn't a masked builtin, just return the align operation.
7082     if (Ops.size() == 3)
7083       return Align;
7084 
7085     return EmitX86Select(*this, Ops[4], Align, Ops[3]);
7086   }
7087 
7088   case X86::BI__builtin_ia32_movnti:
7089   case X86::BI__builtin_ia32_movnti64: {
7090     llvm::MDNode *Node = llvm::MDNode::get(
7091         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7092 
7093     // Convert the type of the pointer to a pointer to the stored type.
7094     Value *BC = Builder.CreateBitCast(Ops[0],
7095                                 llvm::PointerType::getUnqual(Ops[1]->getType()),
7096                                       "cast");
7097     StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC);
7098     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7099 
7100     // No alignment for scalar intrinsic store.
7101     SI->setAlignment(1);
7102     return SI;
7103   }
7104   case X86::BI__builtin_ia32_movntsd:
7105   case X86::BI__builtin_ia32_movntss: {
7106     llvm::MDNode *Node = llvm::MDNode::get(
7107         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7108 
7109     // Extract the 0'th element of the source vector.
7110     Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract");
7111 
7112     // Convert the type of the pointer to a pointer to the stored type.
7113     Value *BC = Builder.CreateBitCast(Ops[0],
7114                                 llvm::PointerType::getUnqual(Scl->getType()),
7115                                       "cast");
7116 
7117     // Unaligned nontemporal store of the scalar value.
7118     StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC);
7119     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7120     SI->setAlignment(1);
7121     return SI;
7122   }
7123 
7124   case X86::BI__builtin_ia32_selectb_128:
7125   case X86::BI__builtin_ia32_selectb_256:
7126   case X86::BI__builtin_ia32_selectb_512:
7127   case X86::BI__builtin_ia32_selectw_128:
7128   case X86::BI__builtin_ia32_selectw_256:
7129   case X86::BI__builtin_ia32_selectw_512:
7130   case X86::BI__builtin_ia32_selectd_128:
7131   case X86::BI__builtin_ia32_selectd_256:
7132   case X86::BI__builtin_ia32_selectd_512:
7133   case X86::BI__builtin_ia32_selectq_128:
7134   case X86::BI__builtin_ia32_selectq_256:
7135   case X86::BI__builtin_ia32_selectq_512:
7136   case X86::BI__builtin_ia32_selectps_128:
7137   case X86::BI__builtin_ia32_selectps_256:
7138   case X86::BI__builtin_ia32_selectps_512:
7139   case X86::BI__builtin_ia32_selectpd_128:
7140   case X86::BI__builtin_ia32_selectpd_256:
7141   case X86::BI__builtin_ia32_selectpd_512:
7142     return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
7143   case X86::BI__builtin_ia32_pcmpeqb128_mask:
7144   case X86::BI__builtin_ia32_pcmpeqb256_mask:
7145   case X86::BI__builtin_ia32_pcmpeqb512_mask:
7146   case X86::BI__builtin_ia32_pcmpeqw128_mask:
7147   case X86::BI__builtin_ia32_pcmpeqw256_mask:
7148   case X86::BI__builtin_ia32_pcmpeqw512_mask:
7149   case X86::BI__builtin_ia32_pcmpeqd128_mask:
7150   case X86::BI__builtin_ia32_pcmpeqd256_mask:
7151   case X86::BI__builtin_ia32_pcmpeqd512_mask:
7152   case X86::BI__builtin_ia32_pcmpeqq128_mask:
7153   case X86::BI__builtin_ia32_pcmpeqq256_mask:
7154   case X86::BI__builtin_ia32_pcmpeqq512_mask:
7155     return EmitX86MaskedCompare(*this, 0, false, Ops);
7156   case X86::BI__builtin_ia32_pcmpgtb128_mask:
7157   case X86::BI__builtin_ia32_pcmpgtb256_mask:
7158   case X86::BI__builtin_ia32_pcmpgtb512_mask:
7159   case X86::BI__builtin_ia32_pcmpgtw128_mask:
7160   case X86::BI__builtin_ia32_pcmpgtw256_mask:
7161   case X86::BI__builtin_ia32_pcmpgtw512_mask:
7162   case X86::BI__builtin_ia32_pcmpgtd128_mask:
7163   case X86::BI__builtin_ia32_pcmpgtd256_mask:
7164   case X86::BI__builtin_ia32_pcmpgtd512_mask:
7165   case X86::BI__builtin_ia32_pcmpgtq128_mask:
7166   case X86::BI__builtin_ia32_pcmpgtq256_mask:
7167   case X86::BI__builtin_ia32_pcmpgtq512_mask:
7168     return EmitX86MaskedCompare(*this, 6, true, Ops);
7169   case X86::BI__builtin_ia32_cmpb128_mask:
7170   case X86::BI__builtin_ia32_cmpb256_mask:
7171   case X86::BI__builtin_ia32_cmpb512_mask:
7172   case X86::BI__builtin_ia32_cmpw128_mask:
7173   case X86::BI__builtin_ia32_cmpw256_mask:
7174   case X86::BI__builtin_ia32_cmpw512_mask:
7175   case X86::BI__builtin_ia32_cmpd128_mask:
7176   case X86::BI__builtin_ia32_cmpd256_mask:
7177   case X86::BI__builtin_ia32_cmpd512_mask:
7178   case X86::BI__builtin_ia32_cmpq128_mask:
7179   case X86::BI__builtin_ia32_cmpq256_mask:
7180   case X86::BI__builtin_ia32_cmpq512_mask: {
7181     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7182     return EmitX86MaskedCompare(*this, CC, true, Ops);
7183   }
7184   case X86::BI__builtin_ia32_ucmpb128_mask:
7185   case X86::BI__builtin_ia32_ucmpb256_mask:
7186   case X86::BI__builtin_ia32_ucmpb512_mask:
7187   case X86::BI__builtin_ia32_ucmpw128_mask:
7188   case X86::BI__builtin_ia32_ucmpw256_mask:
7189   case X86::BI__builtin_ia32_ucmpw512_mask:
7190   case X86::BI__builtin_ia32_ucmpd128_mask:
7191   case X86::BI__builtin_ia32_ucmpd256_mask:
7192   case X86::BI__builtin_ia32_ucmpd512_mask:
7193   case X86::BI__builtin_ia32_ucmpq128_mask:
7194   case X86::BI__builtin_ia32_ucmpq256_mask:
7195   case X86::BI__builtin_ia32_ucmpq512_mask: {
7196     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7197     return EmitX86MaskedCompare(*this, CC, false, Ops);
7198   }
7199 
7200   case X86::BI__builtin_ia32_vplzcntd_128_mask:
7201   case X86::BI__builtin_ia32_vplzcntd_256_mask:
7202   case X86::BI__builtin_ia32_vplzcntd_512_mask:
7203   case X86::BI__builtin_ia32_vplzcntq_128_mask:
7204   case X86::BI__builtin_ia32_vplzcntq_256_mask:
7205   case X86::BI__builtin_ia32_vplzcntq_512_mask: {
7206     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
7207     return EmitX86Select(*this, Ops[2],
7208                          Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
7209                          Ops[1]);
7210   }
7211 
7212   // TODO: Handle 64/512-bit vector widths of min/max.
7213   case X86::BI__builtin_ia32_pmaxsb128:
7214   case X86::BI__builtin_ia32_pmaxsw128:
7215   case X86::BI__builtin_ia32_pmaxsd128:
7216   case X86::BI__builtin_ia32_pmaxsb256:
7217   case X86::BI__builtin_ia32_pmaxsw256:
7218   case X86::BI__builtin_ia32_pmaxsd256: {
7219     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Ops[1]);
7220     return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7221   }
7222   case X86::BI__builtin_ia32_pmaxub128:
7223   case X86::BI__builtin_ia32_pmaxuw128:
7224   case X86::BI__builtin_ia32_pmaxud128:
7225   case X86::BI__builtin_ia32_pmaxub256:
7226   case X86::BI__builtin_ia32_pmaxuw256:
7227   case X86::BI__builtin_ia32_pmaxud256: {
7228     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]);
7229     return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7230   }
7231   case X86::BI__builtin_ia32_pminsb128:
7232   case X86::BI__builtin_ia32_pminsw128:
7233   case X86::BI__builtin_ia32_pminsd128:
7234   case X86::BI__builtin_ia32_pminsb256:
7235   case X86::BI__builtin_ia32_pminsw256:
7236   case X86::BI__builtin_ia32_pminsd256: {
7237     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SLT, Ops[0], Ops[1]);
7238     return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7239   }
7240   case X86::BI__builtin_ia32_pminub128:
7241   case X86::BI__builtin_ia32_pminuw128:
7242   case X86::BI__builtin_ia32_pminud128:
7243   case X86::BI__builtin_ia32_pminub256:
7244   case X86::BI__builtin_ia32_pminuw256:
7245   case X86::BI__builtin_ia32_pminud256: {
7246     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, Ops[0], Ops[1]);
7247     return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7248   }
7249 
7250   // 3DNow!
7251   case X86::BI__builtin_ia32_pswapdsf:
7252   case X86::BI__builtin_ia32_pswapdsi: {
7253     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
7254     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
7255     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
7256     return Builder.CreateCall(F, Ops, "pswapd");
7257   }
7258   case X86::BI__builtin_ia32_rdrand16_step:
7259   case X86::BI__builtin_ia32_rdrand32_step:
7260   case X86::BI__builtin_ia32_rdrand64_step:
7261   case X86::BI__builtin_ia32_rdseed16_step:
7262   case X86::BI__builtin_ia32_rdseed32_step:
7263   case X86::BI__builtin_ia32_rdseed64_step: {
7264     Intrinsic::ID ID;
7265     switch (BuiltinID) {
7266     default: llvm_unreachable("Unsupported intrinsic!");
7267     case X86::BI__builtin_ia32_rdrand16_step:
7268       ID = Intrinsic::x86_rdrand_16;
7269       break;
7270     case X86::BI__builtin_ia32_rdrand32_step:
7271       ID = Intrinsic::x86_rdrand_32;
7272       break;
7273     case X86::BI__builtin_ia32_rdrand64_step:
7274       ID = Intrinsic::x86_rdrand_64;
7275       break;
7276     case X86::BI__builtin_ia32_rdseed16_step:
7277       ID = Intrinsic::x86_rdseed_16;
7278       break;
7279     case X86::BI__builtin_ia32_rdseed32_step:
7280       ID = Intrinsic::x86_rdseed_32;
7281       break;
7282     case X86::BI__builtin_ia32_rdseed64_step:
7283       ID = Intrinsic::x86_rdseed_64;
7284       break;
7285     }
7286 
7287     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
7288     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
7289                                       Ops[0]);
7290     return Builder.CreateExtractValue(Call, 1);
7291   }
7292 
7293   // SSE packed comparison intrinsics
7294   case X86::BI__builtin_ia32_cmpeqps:
7295   case X86::BI__builtin_ia32_cmpeqpd:
7296     return getVectorFCmpIR(CmpInst::FCMP_OEQ);
7297   case X86::BI__builtin_ia32_cmpltps:
7298   case X86::BI__builtin_ia32_cmpltpd:
7299     return getVectorFCmpIR(CmpInst::FCMP_OLT);
7300   case X86::BI__builtin_ia32_cmpleps:
7301   case X86::BI__builtin_ia32_cmplepd:
7302     return getVectorFCmpIR(CmpInst::FCMP_OLE);
7303   case X86::BI__builtin_ia32_cmpunordps:
7304   case X86::BI__builtin_ia32_cmpunordpd:
7305     return getVectorFCmpIR(CmpInst::FCMP_UNO);
7306   case X86::BI__builtin_ia32_cmpneqps:
7307   case X86::BI__builtin_ia32_cmpneqpd:
7308     return getVectorFCmpIR(CmpInst::FCMP_UNE);
7309   case X86::BI__builtin_ia32_cmpnltps:
7310   case X86::BI__builtin_ia32_cmpnltpd:
7311     return getVectorFCmpIR(CmpInst::FCMP_UGE);
7312   case X86::BI__builtin_ia32_cmpnleps:
7313   case X86::BI__builtin_ia32_cmpnlepd:
7314     return getVectorFCmpIR(CmpInst::FCMP_UGT);
7315   case X86::BI__builtin_ia32_cmpordps:
7316   case X86::BI__builtin_ia32_cmpordpd:
7317     return getVectorFCmpIR(CmpInst::FCMP_ORD);
7318   case X86::BI__builtin_ia32_cmpps:
7319   case X86::BI__builtin_ia32_cmpps256:
7320   case X86::BI__builtin_ia32_cmppd:
7321   case X86::BI__builtin_ia32_cmppd256: {
7322     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7323     // If this one of the SSE immediates, we can use native IR.
7324     if (CC < 8) {
7325       FCmpInst::Predicate Pred;
7326       switch (CC) {
7327       case 0: Pred = FCmpInst::FCMP_OEQ; break;
7328       case 1: Pred = FCmpInst::FCMP_OLT; break;
7329       case 2: Pred = FCmpInst::FCMP_OLE; break;
7330       case 3: Pred = FCmpInst::FCMP_UNO; break;
7331       case 4: Pred = FCmpInst::FCMP_UNE; break;
7332       case 5: Pred = FCmpInst::FCMP_UGE; break;
7333       case 6: Pred = FCmpInst::FCMP_UGT; break;
7334       case 7: Pred = FCmpInst::FCMP_ORD; break;
7335       }
7336       return getVectorFCmpIR(Pred);
7337     }
7338 
7339     // We can't handle 8-31 immediates with native IR, use the intrinsic.
7340     Intrinsic::ID ID;
7341     switch (BuiltinID) {
7342     default: llvm_unreachable("Unsupported intrinsic!");
7343     case X86::BI__builtin_ia32_cmpps:
7344       ID = Intrinsic::x86_sse_cmp_ps;
7345       break;
7346     case X86::BI__builtin_ia32_cmpps256:
7347       ID = Intrinsic::x86_avx_cmp_ps_256;
7348       break;
7349     case X86::BI__builtin_ia32_cmppd:
7350       ID = Intrinsic::x86_sse2_cmp_pd;
7351       break;
7352     case X86::BI__builtin_ia32_cmppd256:
7353       ID = Intrinsic::x86_avx_cmp_pd_256;
7354       break;
7355     }
7356 
7357     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7358   }
7359 
7360   // SSE scalar comparison intrinsics
7361   case X86::BI__builtin_ia32_cmpeqss:
7362     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
7363   case X86::BI__builtin_ia32_cmpltss:
7364     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
7365   case X86::BI__builtin_ia32_cmpless:
7366     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
7367   case X86::BI__builtin_ia32_cmpunordss:
7368     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
7369   case X86::BI__builtin_ia32_cmpneqss:
7370     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
7371   case X86::BI__builtin_ia32_cmpnltss:
7372     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
7373   case X86::BI__builtin_ia32_cmpnless:
7374     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
7375   case X86::BI__builtin_ia32_cmpordss:
7376     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
7377   case X86::BI__builtin_ia32_cmpeqsd:
7378     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
7379   case X86::BI__builtin_ia32_cmpltsd:
7380     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
7381   case X86::BI__builtin_ia32_cmplesd:
7382     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
7383   case X86::BI__builtin_ia32_cmpunordsd:
7384     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
7385   case X86::BI__builtin_ia32_cmpneqsd:
7386     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
7387   case X86::BI__builtin_ia32_cmpnltsd:
7388     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
7389   case X86::BI__builtin_ia32_cmpnlesd:
7390     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
7391   case X86::BI__builtin_ia32_cmpordsd:
7392     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
7393   }
7394 }
7395 
7396 
7397 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
7398                                            const CallExpr *E) {
7399   SmallVector<Value*, 4> Ops;
7400 
7401   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
7402     Ops.push_back(EmitScalarExpr(E->getArg(i)));
7403 
7404   Intrinsic::ID ID = Intrinsic::not_intrinsic;
7405 
7406   switch (BuiltinID) {
7407   default: return nullptr;
7408 
7409   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
7410   // call __builtin_readcyclecounter.
7411   case PPC::BI__builtin_ppc_get_timebase:
7412     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
7413 
7414   // vec_ld, vec_lvsl, vec_lvsr
7415   case PPC::BI__builtin_altivec_lvx:
7416   case PPC::BI__builtin_altivec_lvxl:
7417   case PPC::BI__builtin_altivec_lvebx:
7418   case PPC::BI__builtin_altivec_lvehx:
7419   case PPC::BI__builtin_altivec_lvewx:
7420   case PPC::BI__builtin_altivec_lvsl:
7421   case PPC::BI__builtin_altivec_lvsr:
7422   case PPC::BI__builtin_vsx_lxvd2x:
7423   case PPC::BI__builtin_vsx_lxvw4x:
7424   {
7425     Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
7426 
7427     Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
7428     Ops.pop_back();
7429 
7430     switch (BuiltinID) {
7431     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
7432     case PPC::BI__builtin_altivec_lvx:
7433       ID = Intrinsic::ppc_altivec_lvx;
7434       break;
7435     case PPC::BI__builtin_altivec_lvxl:
7436       ID = Intrinsic::ppc_altivec_lvxl;
7437       break;
7438     case PPC::BI__builtin_altivec_lvebx:
7439       ID = Intrinsic::ppc_altivec_lvebx;
7440       break;
7441     case PPC::BI__builtin_altivec_lvehx:
7442       ID = Intrinsic::ppc_altivec_lvehx;
7443       break;
7444     case PPC::BI__builtin_altivec_lvewx:
7445       ID = Intrinsic::ppc_altivec_lvewx;
7446       break;
7447     case PPC::BI__builtin_altivec_lvsl:
7448       ID = Intrinsic::ppc_altivec_lvsl;
7449       break;
7450     case PPC::BI__builtin_altivec_lvsr:
7451       ID = Intrinsic::ppc_altivec_lvsr;
7452       break;
7453     case PPC::BI__builtin_vsx_lxvd2x:
7454       ID = Intrinsic::ppc_vsx_lxvd2x;
7455       break;
7456     case PPC::BI__builtin_vsx_lxvw4x:
7457       ID = Intrinsic::ppc_vsx_lxvw4x;
7458       break;
7459     }
7460     llvm::Function *F = CGM.getIntrinsic(ID);
7461     return Builder.CreateCall(F, Ops, "");
7462   }
7463 
7464   // vec_st
7465   case PPC::BI__builtin_altivec_stvx:
7466   case PPC::BI__builtin_altivec_stvxl:
7467   case PPC::BI__builtin_altivec_stvebx:
7468   case PPC::BI__builtin_altivec_stvehx:
7469   case PPC::BI__builtin_altivec_stvewx:
7470   case PPC::BI__builtin_vsx_stxvd2x:
7471   case PPC::BI__builtin_vsx_stxvw4x:
7472   {
7473     Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
7474     Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
7475     Ops.pop_back();
7476 
7477     switch (BuiltinID) {
7478     default: llvm_unreachable("Unsupported st intrinsic!");
7479     case PPC::BI__builtin_altivec_stvx:
7480       ID = Intrinsic::ppc_altivec_stvx;
7481       break;
7482     case PPC::BI__builtin_altivec_stvxl:
7483       ID = Intrinsic::ppc_altivec_stvxl;
7484       break;
7485     case PPC::BI__builtin_altivec_stvebx:
7486       ID = Intrinsic::ppc_altivec_stvebx;
7487       break;
7488     case PPC::BI__builtin_altivec_stvehx:
7489       ID = Intrinsic::ppc_altivec_stvehx;
7490       break;
7491     case PPC::BI__builtin_altivec_stvewx:
7492       ID = Intrinsic::ppc_altivec_stvewx;
7493       break;
7494     case PPC::BI__builtin_vsx_stxvd2x:
7495       ID = Intrinsic::ppc_vsx_stxvd2x;
7496       break;
7497     case PPC::BI__builtin_vsx_stxvw4x:
7498       ID = Intrinsic::ppc_vsx_stxvw4x;
7499       break;
7500     }
7501     llvm::Function *F = CGM.getIntrinsic(ID);
7502     return Builder.CreateCall(F, Ops, "");
7503   }
7504   // Square root
7505   case PPC::BI__builtin_vsx_xvsqrtsp:
7506   case PPC::BI__builtin_vsx_xvsqrtdp: {
7507     llvm::Type *ResultType = ConvertType(E->getType());
7508     Value *X = EmitScalarExpr(E->getArg(0));
7509     ID = Intrinsic::sqrt;
7510     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
7511     return Builder.CreateCall(F, X);
7512   }
7513   // Count leading zeros
7514   case PPC::BI__builtin_altivec_vclzb:
7515   case PPC::BI__builtin_altivec_vclzh:
7516   case PPC::BI__builtin_altivec_vclzw:
7517   case PPC::BI__builtin_altivec_vclzd: {
7518     llvm::Type *ResultType = ConvertType(E->getType());
7519     Value *X = EmitScalarExpr(E->getArg(0));
7520     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7521     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
7522     return Builder.CreateCall(F, {X, Undef});
7523   }
7524   // Copy sign
7525   case PPC::BI__builtin_vsx_xvcpsgnsp:
7526   case PPC::BI__builtin_vsx_xvcpsgndp: {
7527     llvm::Type *ResultType = ConvertType(E->getType());
7528     Value *X = EmitScalarExpr(E->getArg(0));
7529     Value *Y = EmitScalarExpr(E->getArg(1));
7530     ID = Intrinsic::copysign;
7531     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
7532     return Builder.CreateCall(F, {X, Y});
7533   }
7534   // Rounding/truncation
7535   case PPC::BI__builtin_vsx_xvrspip:
7536   case PPC::BI__builtin_vsx_xvrdpip:
7537   case PPC::BI__builtin_vsx_xvrdpim:
7538   case PPC::BI__builtin_vsx_xvrspim:
7539   case PPC::BI__builtin_vsx_xvrdpi:
7540   case PPC::BI__builtin_vsx_xvrspi:
7541   case PPC::BI__builtin_vsx_xvrdpic:
7542   case PPC::BI__builtin_vsx_xvrspic:
7543   case PPC::BI__builtin_vsx_xvrdpiz:
7544   case PPC::BI__builtin_vsx_xvrspiz: {
7545     llvm::Type *ResultType = ConvertType(E->getType());
7546     Value *X = EmitScalarExpr(E->getArg(0));
7547     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
7548         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
7549       ID = Intrinsic::floor;
7550     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
7551              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
7552       ID = Intrinsic::round;
7553     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
7554              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
7555       ID = Intrinsic::nearbyint;
7556     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
7557              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
7558       ID = Intrinsic::ceil;
7559     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
7560              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
7561       ID = Intrinsic::trunc;
7562     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
7563     return Builder.CreateCall(F, X);
7564   }
7565 
7566   // Absolute value
7567   case PPC::BI__builtin_vsx_xvabsdp:
7568   case PPC::BI__builtin_vsx_xvabssp: {
7569     llvm::Type *ResultType = ConvertType(E->getType());
7570     Value *X = EmitScalarExpr(E->getArg(0));
7571     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7572     return Builder.CreateCall(F, X);
7573   }
7574 
7575   // FMA variations
7576   case PPC::BI__builtin_vsx_xvmaddadp:
7577   case PPC::BI__builtin_vsx_xvmaddasp:
7578   case PPC::BI__builtin_vsx_xvnmaddadp:
7579   case PPC::BI__builtin_vsx_xvnmaddasp:
7580   case PPC::BI__builtin_vsx_xvmsubadp:
7581   case PPC::BI__builtin_vsx_xvmsubasp:
7582   case PPC::BI__builtin_vsx_xvnmsubadp:
7583   case PPC::BI__builtin_vsx_xvnmsubasp: {
7584     llvm::Type *ResultType = ConvertType(E->getType());
7585     Value *X = EmitScalarExpr(E->getArg(0));
7586     Value *Y = EmitScalarExpr(E->getArg(1));
7587     Value *Z = EmitScalarExpr(E->getArg(2));
7588     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7589     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7590     switch (BuiltinID) {
7591       case PPC::BI__builtin_vsx_xvmaddadp:
7592       case PPC::BI__builtin_vsx_xvmaddasp:
7593         return Builder.CreateCall(F, {X, Y, Z});
7594       case PPC::BI__builtin_vsx_xvnmaddadp:
7595       case PPC::BI__builtin_vsx_xvnmaddasp:
7596         return Builder.CreateFSub(Zero,
7597                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
7598       case PPC::BI__builtin_vsx_xvmsubadp:
7599       case PPC::BI__builtin_vsx_xvmsubasp:
7600         return Builder.CreateCall(F,
7601                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
7602       case PPC::BI__builtin_vsx_xvnmsubadp:
7603       case PPC::BI__builtin_vsx_xvnmsubasp:
7604         Value *FsubRes =
7605           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
7606         return Builder.CreateFSub(Zero, FsubRes, "sub");
7607     }
7608     llvm_unreachable("Unknown FMA operation");
7609     return nullptr; // Suppress no-return warning
7610   }
7611   }
7612 }
7613 
7614 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
7615                                               const CallExpr *E) {
7616   switch (BuiltinID) {
7617   case AMDGPU::BI__builtin_amdgcn_div_scale:
7618   case AMDGPU::BI__builtin_amdgcn_div_scalef: {
7619     // Translate from the intrinsics's struct return to the builtin's out
7620     // argument.
7621 
7622     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
7623 
7624     llvm::Value *X = EmitScalarExpr(E->getArg(0));
7625     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
7626     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
7627 
7628     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
7629                                            X->getType());
7630 
7631     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
7632 
7633     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
7634     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
7635 
7636     llvm::Type *RealFlagType
7637       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
7638 
7639     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
7640     Builder.CreateStore(FlagExt, FlagOutPtr);
7641     return Result;
7642   }
7643   case AMDGPU::BI__builtin_amdgcn_div_fmas:
7644   case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
7645     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
7646     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
7647     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
7648     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
7649 
7650     llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
7651                                       Src0->getType());
7652     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
7653     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
7654   }
7655   case AMDGPU::BI__builtin_amdgcn_div_fixup:
7656   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
7657     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
7658   case AMDGPU::BI__builtin_amdgcn_trig_preop:
7659   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
7660     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
7661   case AMDGPU::BI__builtin_amdgcn_rcp:
7662   case AMDGPU::BI__builtin_amdgcn_rcpf:
7663     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
7664   case AMDGPU::BI__builtin_amdgcn_rsq:
7665   case AMDGPU::BI__builtin_amdgcn_rsqf:
7666     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
7667   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
7668   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
7669     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
7670   case AMDGPU::BI__builtin_amdgcn_sinf:
7671     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
7672   case AMDGPU::BI__builtin_amdgcn_cosf:
7673     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
7674   case AMDGPU::BI__builtin_amdgcn_log_clampf:
7675     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
7676   case AMDGPU::BI__builtin_amdgcn_ldexp:
7677   case AMDGPU::BI__builtin_amdgcn_ldexpf:
7678     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
7679   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
7680   case AMDGPU::BI__builtin_amdgcn_frexp_mantf: {
7681     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
7682   }
7683   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
7684   case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
7685     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_exp);
7686   }
7687   case AMDGPU::BI__builtin_amdgcn_fract:
7688   case AMDGPU::BI__builtin_amdgcn_fractf:
7689     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
7690   case AMDGPU::BI__builtin_amdgcn_lerp:
7691     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
7692   case AMDGPU::BI__builtin_amdgcn_uicmp:
7693   case AMDGPU::BI__builtin_amdgcn_uicmpl:
7694   case AMDGPU::BI__builtin_amdgcn_sicmp:
7695   case AMDGPU::BI__builtin_amdgcn_sicmpl:
7696     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
7697   case AMDGPU::BI__builtin_amdgcn_fcmp:
7698   case AMDGPU::BI__builtin_amdgcn_fcmpf:
7699     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
7700   case AMDGPU::BI__builtin_amdgcn_class:
7701   case AMDGPU::BI__builtin_amdgcn_classf:
7702     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
7703 
7704   case AMDGPU::BI__builtin_amdgcn_read_exec: {
7705     CallInst *CI = cast<CallInst>(
7706       EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
7707     CI->setConvergent();
7708     return CI;
7709   }
7710 
7711   // amdgcn workitem
7712   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
7713     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
7714   case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
7715     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
7716   case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
7717     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
7718 
7719   // r600 intrinsics
7720   case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
7721   case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
7722     return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
7723   case AMDGPU::BI__builtin_r600_read_tidig_x:
7724     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
7725   case AMDGPU::BI__builtin_r600_read_tidig_y:
7726     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
7727   case AMDGPU::BI__builtin_r600_read_tidig_z:
7728     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
7729   default:
7730     return nullptr;
7731   }
7732 }
7733 
7734 /// Handle a SystemZ function in which the final argument is a pointer
7735 /// to an int that receives the post-instruction CC value.  At the LLVM level
7736 /// this is represented as a function that returns a {result, cc} pair.
7737 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
7738                                          unsigned IntrinsicID,
7739                                          const CallExpr *E) {
7740   unsigned NumArgs = E->getNumArgs() - 1;
7741   SmallVector<Value *, 8> Args(NumArgs);
7742   for (unsigned I = 0; I < NumArgs; ++I)
7743     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
7744   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
7745   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
7746   Value *Call = CGF.Builder.CreateCall(F, Args);
7747   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
7748   CGF.Builder.CreateStore(CC, CCPtr);
7749   return CGF.Builder.CreateExtractValue(Call, 0);
7750 }
7751 
7752 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
7753                                                const CallExpr *E) {
7754   switch (BuiltinID) {
7755   case SystemZ::BI__builtin_tbegin: {
7756     Value *TDB = EmitScalarExpr(E->getArg(0));
7757     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
7758     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
7759     return Builder.CreateCall(F, {TDB, Control});
7760   }
7761   case SystemZ::BI__builtin_tbegin_nofloat: {
7762     Value *TDB = EmitScalarExpr(E->getArg(0));
7763     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
7764     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
7765     return Builder.CreateCall(F, {TDB, Control});
7766   }
7767   case SystemZ::BI__builtin_tbeginc: {
7768     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
7769     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
7770     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
7771     return Builder.CreateCall(F, {TDB, Control});
7772   }
7773   case SystemZ::BI__builtin_tabort: {
7774     Value *Data = EmitScalarExpr(E->getArg(0));
7775     Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
7776     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
7777   }
7778   case SystemZ::BI__builtin_non_tx_store: {
7779     Value *Address = EmitScalarExpr(E->getArg(0));
7780     Value *Data = EmitScalarExpr(E->getArg(1));
7781     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
7782     return Builder.CreateCall(F, {Data, Address});
7783   }
7784 
7785   // Vector builtins.  Note that most vector builtins are mapped automatically
7786   // to target-specific LLVM intrinsics.  The ones handled specially here can
7787   // be represented via standard LLVM IR, which is preferable to enable common
7788   // LLVM optimizations.
7789 
7790   case SystemZ::BI__builtin_s390_vpopctb:
7791   case SystemZ::BI__builtin_s390_vpopcth:
7792   case SystemZ::BI__builtin_s390_vpopctf:
7793   case SystemZ::BI__builtin_s390_vpopctg: {
7794     llvm::Type *ResultType = ConvertType(E->getType());
7795     Value *X = EmitScalarExpr(E->getArg(0));
7796     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
7797     return Builder.CreateCall(F, X);
7798   }
7799 
7800   case SystemZ::BI__builtin_s390_vclzb:
7801   case SystemZ::BI__builtin_s390_vclzh:
7802   case SystemZ::BI__builtin_s390_vclzf:
7803   case SystemZ::BI__builtin_s390_vclzg: {
7804     llvm::Type *ResultType = ConvertType(E->getType());
7805     Value *X = EmitScalarExpr(E->getArg(0));
7806     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7807     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
7808     return Builder.CreateCall(F, {X, Undef});
7809   }
7810 
7811   case SystemZ::BI__builtin_s390_vctzb:
7812   case SystemZ::BI__builtin_s390_vctzh:
7813   case SystemZ::BI__builtin_s390_vctzf:
7814   case SystemZ::BI__builtin_s390_vctzg: {
7815     llvm::Type *ResultType = ConvertType(E->getType());
7816     Value *X = EmitScalarExpr(E->getArg(0));
7817     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7818     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
7819     return Builder.CreateCall(F, {X, Undef});
7820   }
7821 
7822   case SystemZ::BI__builtin_s390_vfsqdb: {
7823     llvm::Type *ResultType = ConvertType(E->getType());
7824     Value *X = EmitScalarExpr(E->getArg(0));
7825     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
7826     return Builder.CreateCall(F, X);
7827   }
7828   case SystemZ::BI__builtin_s390_vfmadb: {
7829     llvm::Type *ResultType = ConvertType(E->getType());
7830     Value *X = EmitScalarExpr(E->getArg(0));
7831     Value *Y = EmitScalarExpr(E->getArg(1));
7832     Value *Z = EmitScalarExpr(E->getArg(2));
7833     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7834     return Builder.CreateCall(F, {X, Y, Z});
7835   }
7836   case SystemZ::BI__builtin_s390_vfmsdb: {
7837     llvm::Type *ResultType = ConvertType(E->getType());
7838     Value *X = EmitScalarExpr(E->getArg(0));
7839     Value *Y = EmitScalarExpr(E->getArg(1));
7840     Value *Z = EmitScalarExpr(E->getArg(2));
7841     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7842     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7843     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
7844   }
7845   case SystemZ::BI__builtin_s390_vflpdb: {
7846     llvm::Type *ResultType = ConvertType(E->getType());
7847     Value *X = EmitScalarExpr(E->getArg(0));
7848     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7849     return Builder.CreateCall(F, X);
7850   }
7851   case SystemZ::BI__builtin_s390_vflndb: {
7852     llvm::Type *ResultType = ConvertType(E->getType());
7853     Value *X = EmitScalarExpr(E->getArg(0));
7854     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7855     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7856     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
7857   }
7858   case SystemZ::BI__builtin_s390_vfidb: {
7859     llvm::Type *ResultType = ConvertType(E->getType());
7860     Value *X = EmitScalarExpr(E->getArg(0));
7861     // Constant-fold the M4 and M5 mask arguments.
7862     llvm::APSInt M4, M5;
7863     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
7864     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
7865     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
7866     (void)IsConstM4; (void)IsConstM5;
7867     // Check whether this instance of vfidb can be represented via a LLVM
7868     // standard intrinsic.  We only support some combinations of M4 and M5.
7869     Intrinsic::ID ID = Intrinsic::not_intrinsic;
7870     switch (M4.getZExtValue()) {
7871     default: break;
7872     case 0:  // IEEE-inexact exception allowed
7873       switch (M5.getZExtValue()) {
7874       default: break;
7875       case 0: ID = Intrinsic::rint; break;
7876       }
7877       break;
7878     case 4:  // IEEE-inexact exception suppressed
7879       switch (M5.getZExtValue()) {
7880       default: break;
7881       case 0: ID = Intrinsic::nearbyint; break;
7882       case 1: ID = Intrinsic::round; break;
7883       case 5: ID = Intrinsic::trunc; break;
7884       case 6: ID = Intrinsic::ceil; break;
7885       case 7: ID = Intrinsic::floor; break;
7886       }
7887       break;
7888     }
7889     if (ID != Intrinsic::not_intrinsic) {
7890       Function *F = CGM.getIntrinsic(ID, ResultType);
7891       return Builder.CreateCall(F, X);
7892     }
7893     Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
7894     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
7895     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
7896     return Builder.CreateCall(F, {X, M4Value, M5Value});
7897   }
7898 
7899   // Vector intrisincs that output the post-instruction CC value.
7900 
7901 #define INTRINSIC_WITH_CC(NAME) \
7902     case SystemZ::BI__builtin_##NAME: \
7903       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
7904 
7905   INTRINSIC_WITH_CC(s390_vpkshs);
7906   INTRINSIC_WITH_CC(s390_vpksfs);
7907   INTRINSIC_WITH_CC(s390_vpksgs);
7908 
7909   INTRINSIC_WITH_CC(s390_vpklshs);
7910   INTRINSIC_WITH_CC(s390_vpklsfs);
7911   INTRINSIC_WITH_CC(s390_vpklsgs);
7912 
7913   INTRINSIC_WITH_CC(s390_vceqbs);
7914   INTRINSIC_WITH_CC(s390_vceqhs);
7915   INTRINSIC_WITH_CC(s390_vceqfs);
7916   INTRINSIC_WITH_CC(s390_vceqgs);
7917 
7918   INTRINSIC_WITH_CC(s390_vchbs);
7919   INTRINSIC_WITH_CC(s390_vchhs);
7920   INTRINSIC_WITH_CC(s390_vchfs);
7921   INTRINSIC_WITH_CC(s390_vchgs);
7922 
7923   INTRINSIC_WITH_CC(s390_vchlbs);
7924   INTRINSIC_WITH_CC(s390_vchlhs);
7925   INTRINSIC_WITH_CC(s390_vchlfs);
7926   INTRINSIC_WITH_CC(s390_vchlgs);
7927 
7928   INTRINSIC_WITH_CC(s390_vfaebs);
7929   INTRINSIC_WITH_CC(s390_vfaehs);
7930   INTRINSIC_WITH_CC(s390_vfaefs);
7931 
7932   INTRINSIC_WITH_CC(s390_vfaezbs);
7933   INTRINSIC_WITH_CC(s390_vfaezhs);
7934   INTRINSIC_WITH_CC(s390_vfaezfs);
7935 
7936   INTRINSIC_WITH_CC(s390_vfeebs);
7937   INTRINSIC_WITH_CC(s390_vfeehs);
7938   INTRINSIC_WITH_CC(s390_vfeefs);
7939 
7940   INTRINSIC_WITH_CC(s390_vfeezbs);
7941   INTRINSIC_WITH_CC(s390_vfeezhs);
7942   INTRINSIC_WITH_CC(s390_vfeezfs);
7943 
7944   INTRINSIC_WITH_CC(s390_vfenebs);
7945   INTRINSIC_WITH_CC(s390_vfenehs);
7946   INTRINSIC_WITH_CC(s390_vfenefs);
7947 
7948   INTRINSIC_WITH_CC(s390_vfenezbs);
7949   INTRINSIC_WITH_CC(s390_vfenezhs);
7950   INTRINSIC_WITH_CC(s390_vfenezfs);
7951 
7952   INTRINSIC_WITH_CC(s390_vistrbs);
7953   INTRINSIC_WITH_CC(s390_vistrhs);
7954   INTRINSIC_WITH_CC(s390_vistrfs);
7955 
7956   INTRINSIC_WITH_CC(s390_vstrcbs);
7957   INTRINSIC_WITH_CC(s390_vstrchs);
7958   INTRINSIC_WITH_CC(s390_vstrcfs);
7959 
7960   INTRINSIC_WITH_CC(s390_vstrczbs);
7961   INTRINSIC_WITH_CC(s390_vstrczhs);
7962   INTRINSIC_WITH_CC(s390_vstrczfs);
7963 
7964   INTRINSIC_WITH_CC(s390_vfcedbs);
7965   INTRINSIC_WITH_CC(s390_vfchdbs);
7966   INTRINSIC_WITH_CC(s390_vfchedbs);
7967 
7968   INTRINSIC_WITH_CC(s390_vftcidb);
7969 
7970 #undef INTRINSIC_WITH_CC
7971 
7972   default:
7973     return nullptr;
7974   }
7975 }
7976 
7977 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
7978                                              const CallExpr *E) {
7979   auto MakeLdg = [&](unsigned IntrinsicID) {
7980     Value *Ptr = EmitScalarExpr(E->getArg(0));
7981     AlignmentSource AlignSource;
7982     clang::CharUnits Align =
7983         getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource);
7984     return Builder.CreateCall(
7985         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
7986                                        Ptr->getType()}),
7987         {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
7988   };
7989 
7990   switch (BuiltinID) {
7991   case NVPTX::BI__nvvm_atom_add_gen_i:
7992   case NVPTX::BI__nvvm_atom_add_gen_l:
7993   case NVPTX::BI__nvvm_atom_add_gen_ll:
7994     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
7995 
7996   case NVPTX::BI__nvvm_atom_sub_gen_i:
7997   case NVPTX::BI__nvvm_atom_sub_gen_l:
7998   case NVPTX::BI__nvvm_atom_sub_gen_ll:
7999     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
8000 
8001   case NVPTX::BI__nvvm_atom_and_gen_i:
8002   case NVPTX::BI__nvvm_atom_and_gen_l:
8003   case NVPTX::BI__nvvm_atom_and_gen_ll:
8004     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
8005 
8006   case NVPTX::BI__nvvm_atom_or_gen_i:
8007   case NVPTX::BI__nvvm_atom_or_gen_l:
8008   case NVPTX::BI__nvvm_atom_or_gen_ll:
8009     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
8010 
8011   case NVPTX::BI__nvvm_atom_xor_gen_i:
8012   case NVPTX::BI__nvvm_atom_xor_gen_l:
8013   case NVPTX::BI__nvvm_atom_xor_gen_ll:
8014     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
8015 
8016   case NVPTX::BI__nvvm_atom_xchg_gen_i:
8017   case NVPTX::BI__nvvm_atom_xchg_gen_l:
8018   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
8019     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
8020 
8021   case NVPTX::BI__nvvm_atom_max_gen_i:
8022   case NVPTX::BI__nvvm_atom_max_gen_l:
8023   case NVPTX::BI__nvvm_atom_max_gen_ll:
8024     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
8025 
8026   case NVPTX::BI__nvvm_atom_max_gen_ui:
8027   case NVPTX::BI__nvvm_atom_max_gen_ul:
8028   case NVPTX::BI__nvvm_atom_max_gen_ull:
8029     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
8030 
8031   case NVPTX::BI__nvvm_atom_min_gen_i:
8032   case NVPTX::BI__nvvm_atom_min_gen_l:
8033   case NVPTX::BI__nvvm_atom_min_gen_ll:
8034     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
8035 
8036   case NVPTX::BI__nvvm_atom_min_gen_ui:
8037   case NVPTX::BI__nvvm_atom_min_gen_ul:
8038   case NVPTX::BI__nvvm_atom_min_gen_ull:
8039     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
8040 
8041   case NVPTX::BI__nvvm_atom_cas_gen_i:
8042   case NVPTX::BI__nvvm_atom_cas_gen_l:
8043   case NVPTX::BI__nvvm_atom_cas_gen_ll:
8044     // __nvvm_atom_cas_gen_* should return the old value rather than the
8045     // success flag.
8046     return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
8047 
8048   case NVPTX::BI__nvvm_atom_add_gen_f: {
8049     Value *Ptr = EmitScalarExpr(E->getArg(0));
8050     Value *Val = EmitScalarExpr(E->getArg(1));
8051     // atomicrmw only deals with integer arguments so we need to use
8052     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
8053     Value *FnALAF32 =
8054         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
8055     return Builder.CreateCall(FnALAF32, {Ptr, Val});
8056   }
8057 
8058   case NVPTX::BI__nvvm_atom_inc_gen_ui: {
8059     Value *Ptr = EmitScalarExpr(E->getArg(0));
8060     Value *Val = EmitScalarExpr(E->getArg(1));
8061     Value *FnALI32 =
8062         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
8063     return Builder.CreateCall(FnALI32, {Ptr, Val});
8064   }
8065 
8066   case NVPTX::BI__nvvm_atom_dec_gen_ui: {
8067     Value *Ptr = EmitScalarExpr(E->getArg(0));
8068     Value *Val = EmitScalarExpr(E->getArg(1));
8069     Value *FnALD32 =
8070         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
8071     return Builder.CreateCall(FnALD32, {Ptr, Val});
8072   }
8073 
8074   case NVPTX::BI__nvvm_ldg_c:
8075   case NVPTX::BI__nvvm_ldg_c2:
8076   case NVPTX::BI__nvvm_ldg_c4:
8077   case NVPTX::BI__nvvm_ldg_s:
8078   case NVPTX::BI__nvvm_ldg_s2:
8079   case NVPTX::BI__nvvm_ldg_s4:
8080   case NVPTX::BI__nvvm_ldg_i:
8081   case NVPTX::BI__nvvm_ldg_i2:
8082   case NVPTX::BI__nvvm_ldg_i4:
8083   case NVPTX::BI__nvvm_ldg_l:
8084   case NVPTX::BI__nvvm_ldg_ll:
8085   case NVPTX::BI__nvvm_ldg_ll2:
8086   case NVPTX::BI__nvvm_ldg_uc:
8087   case NVPTX::BI__nvvm_ldg_uc2:
8088   case NVPTX::BI__nvvm_ldg_uc4:
8089   case NVPTX::BI__nvvm_ldg_us:
8090   case NVPTX::BI__nvvm_ldg_us2:
8091   case NVPTX::BI__nvvm_ldg_us4:
8092   case NVPTX::BI__nvvm_ldg_ui:
8093   case NVPTX::BI__nvvm_ldg_ui2:
8094   case NVPTX::BI__nvvm_ldg_ui4:
8095   case NVPTX::BI__nvvm_ldg_ul:
8096   case NVPTX::BI__nvvm_ldg_ull:
8097   case NVPTX::BI__nvvm_ldg_ull2:
8098     // PTX Interoperability section 2.2: "For a vector with an even number of
8099     // elements, its alignment is set to number of elements times the alignment
8100     // of its member: n*alignof(t)."
8101     return MakeLdg(Intrinsic::nvvm_ldg_global_i);
8102   case NVPTX::BI__nvvm_ldg_f:
8103   case NVPTX::BI__nvvm_ldg_f2:
8104   case NVPTX::BI__nvvm_ldg_f4:
8105   case NVPTX::BI__nvvm_ldg_d:
8106   case NVPTX::BI__nvvm_ldg_d2:
8107     return MakeLdg(Intrinsic::nvvm_ldg_global_f);
8108   default:
8109     return nullptr;
8110   }
8111 }
8112 
8113 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
8114                                                    const CallExpr *E) {
8115   switch (BuiltinID) {
8116   case WebAssembly::BI__builtin_wasm_current_memory: {
8117     llvm::Type *ResultType = ConvertType(E->getType());
8118     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
8119     return Builder.CreateCall(Callee);
8120   }
8121   case WebAssembly::BI__builtin_wasm_grow_memory: {
8122     Value *X = EmitScalarExpr(E->getArg(0));
8123     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
8124     return Builder.CreateCall(Callee, X);
8125   }
8126 
8127   default:
8128     return nullptr;
8129   }
8130 }
8131