1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CodeGenFunction.h"
15 #include "CGCXXABI.h"
16 #include "CGObjCRuntime.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/ASTContext.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/Basic/TargetBuiltins.h"
22 #include "clang/Basic/TargetInfo.h"
23 #include "clang/CodeGen/CGFunctionInfo.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/IR/CallSite.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/InlineAsm.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include <sstream>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 using namespace llvm;
34 
35 /// getBuiltinLibFunction - Given a builtin id for a function like
36 /// "__builtin_fabsf", return a Function* for "fabsf".
37 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
38                                                   unsigned BuiltinID) {
39   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
40 
41   // Get the name, skip over the __builtin_ prefix (if necessary).
42   StringRef Name;
43   GlobalDecl D(FD);
44 
45   // If the builtin has been declared explicitly with an assembler label,
46   // use the mangled name. This differs from the plain label on platforms
47   // that prefix labels.
48   if (FD->hasAttr<AsmLabelAttr>())
49     Name = getMangledName(D);
50   else
51     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
52 
53   llvm::FunctionType *Ty =
54     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
55 
56   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
57 }
58 
59 /// Emit the conversions required to turn the given value into an
60 /// integer of the given size.
61 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
62                         QualType T, llvm::IntegerType *IntType) {
63   V = CGF.EmitToMemory(V, T);
64 
65   if (V->getType()->isPointerTy())
66     return CGF.Builder.CreatePtrToInt(V, IntType);
67 
68   assert(V->getType() == IntType);
69   return V;
70 }
71 
72 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
73                           QualType T, llvm::Type *ResultType) {
74   V = CGF.EmitFromMemory(V, T);
75 
76   if (ResultType->isPointerTy())
77     return CGF.Builder.CreateIntToPtr(V, ResultType);
78 
79   assert(V->getType() == ResultType);
80   return V;
81 }
82 
83 /// Utility to insert an atomic instruction based on Instrinsic::ID
84 /// and the expression node.
85 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
86                                     llvm::AtomicRMWInst::BinOp Kind,
87                                     const CallExpr *E) {
88   QualType T = E->getType();
89   assert(E->getArg(0)->getType()->isPointerType());
90   assert(CGF.getContext().hasSameUnqualifiedType(T,
91                                   E->getArg(0)->getType()->getPointeeType()));
92   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
93 
94   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
95   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
96 
97   llvm::IntegerType *IntType =
98     llvm::IntegerType::get(CGF.getLLVMContext(),
99                            CGF.getContext().getTypeSize(T));
100   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
101 
102   llvm::Value *Args[2];
103   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
104   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
105   llvm::Type *ValueType = Args[1]->getType();
106   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
107 
108   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
109       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
110   return EmitFromInt(CGF, Result, T, ValueType);
111 }
112 
113 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
114   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
115   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
116 
117   // Convert the type of the pointer to a pointer to the stored type.
118   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
119   Value *BC = CGF.Builder.CreateBitCast(
120       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
121   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
122   LV.setNontemporal(true);
123   CGF.EmitStoreOfScalar(Val, LV, false);
124   return nullptr;
125 }
126 
127 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
128   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
129 
130   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
131   LV.setNontemporal(true);
132   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
133 }
134 
135 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
136                                llvm::AtomicRMWInst::BinOp Kind,
137                                const CallExpr *E) {
138   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
139 }
140 
141 /// Utility to insert an atomic instruction based Instrinsic::ID and
142 /// the expression node, where the return value is the result of the
143 /// operation.
144 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
145                                    llvm::AtomicRMWInst::BinOp Kind,
146                                    const CallExpr *E,
147                                    Instruction::BinaryOps Op,
148                                    bool Invert = false) {
149   QualType T = E->getType();
150   assert(E->getArg(0)->getType()->isPointerType());
151   assert(CGF.getContext().hasSameUnqualifiedType(T,
152                                   E->getArg(0)->getType()->getPointeeType()));
153   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
154 
155   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
156   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
157 
158   llvm::IntegerType *IntType =
159     llvm::IntegerType::get(CGF.getLLVMContext(),
160                            CGF.getContext().getTypeSize(T));
161   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
162 
163   llvm::Value *Args[2];
164   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
165   llvm::Type *ValueType = Args[1]->getType();
166   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
167   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
168 
169   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
170       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
171   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
172   if (Invert)
173     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
174                                      llvm::ConstantInt::get(IntType, -1));
175   Result = EmitFromInt(CGF, Result, T, ValueType);
176   return RValue::get(Result);
177 }
178 
179 /// @brief Utility to insert an atomic cmpxchg instruction.
180 ///
181 /// @param CGF The current codegen function.
182 /// @param E   Builtin call expression to convert to cmpxchg.
183 ///            arg0 - address to operate on
184 ///            arg1 - value to compare with
185 ///            arg2 - new value
186 /// @param ReturnBool Specifies whether to return success flag of
187 ///                   cmpxchg result or the old value.
188 ///
189 /// @returns result of cmpxchg, according to ReturnBool
190 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
191                                      bool ReturnBool) {
192   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
193   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
194   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
195 
196   llvm::IntegerType *IntType = llvm::IntegerType::get(
197       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
198   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
199 
200   Value *Args[3];
201   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
202   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
203   llvm::Type *ValueType = Args[1]->getType();
204   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
205   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
206 
207   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
208       Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
209       llvm::AtomicOrdering::SequentiallyConsistent);
210   if (ReturnBool)
211     // Extract boolean success flag and zext it to int.
212     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
213                                   CGF.ConvertType(E->getType()));
214   else
215     // Extract old value and emit it using the same type as compare value.
216     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
217                        ValueType);
218 }
219 
220 /// EmitFAbs - Emit a call to @llvm.fabs().
221 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
222   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
223   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
224   Call->setDoesNotAccessMemory();
225   return Call;
226 }
227 
228 /// Emit the computation of the sign bit for a floating point value. Returns
229 /// the i1 sign bit value.
230 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
231   LLVMContext &C = CGF.CGM.getLLVMContext();
232 
233   llvm::Type *Ty = V->getType();
234   int Width = Ty->getPrimitiveSizeInBits();
235   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
236   V = CGF.Builder.CreateBitCast(V, IntTy);
237   if (Ty->isPPC_FP128Ty()) {
238     // We want the sign bit of the higher-order double. The bitcast we just
239     // did works as if the double-double was stored to memory and then
240     // read as an i128. The "store" will put the higher-order double in the
241     // lower address in both little- and big-Endian modes, but the "load"
242     // will treat those bits as a different part of the i128: the low bits in
243     // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
244     // we need to shift the high bits down to the low before truncating.
245     Width >>= 1;
246     if (CGF.getTarget().isBigEndian()) {
247       Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
248       V = CGF.Builder.CreateLShr(V, ShiftCst);
249     }
250     // We are truncating value in order to extract the higher-order
251     // double, which we will be using to extract the sign from.
252     IntTy = llvm::IntegerType::get(C, Width);
253     V = CGF.Builder.CreateTrunc(V, IntTy);
254   }
255   Value *Zero = llvm::Constant::getNullValue(IntTy);
256   return CGF.Builder.CreateICmpSLT(V, Zero);
257 }
258 
259 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
260                               const CallExpr *E, llvm::Value *calleeValue) {
261   return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E,
262                       ReturnValueSlot(), Fn);
263 }
264 
265 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
266 /// depending on IntrinsicID.
267 ///
268 /// \arg CGF The current codegen function.
269 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
270 /// \arg X The first argument to the llvm.*.with.overflow.*.
271 /// \arg Y The second argument to the llvm.*.with.overflow.*.
272 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
273 /// \returns The result (i.e. sum/product) returned by the intrinsic.
274 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
275                                           const llvm::Intrinsic::ID IntrinsicID,
276                                           llvm::Value *X, llvm::Value *Y,
277                                           llvm::Value *&Carry) {
278   // Make sure we have integers of the same width.
279   assert(X->getType() == Y->getType() &&
280          "Arguments must be the same type. (Did you forget to make sure both "
281          "arguments have the same integer width?)");
282 
283   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
284   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
285   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
286   return CGF.Builder.CreateExtractValue(Tmp, 0);
287 }
288 
289 // Emit a simple mangled intrinsic that has 1 argument and a return type
290 // matching the argument type.
291 static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
292                                const CallExpr *E,
293                                unsigned IntrinsicID) {
294   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
295 
296   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
297   return CGF.Builder.CreateCall(F, Src0);
298 }
299 
300 // Emit an intrinsic that has 3 float or double operands.
301 static Value *emitTernaryFPBuiltin(CodeGenFunction &CGF,
302                                    const CallExpr *E,
303                                    unsigned IntrinsicID) {
304   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
305   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
306   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
307 
308   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
309   return CGF.Builder.CreateCall(F, {Src0, Src1, Src2});
310 }
311 
312 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
313 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
314                                const CallExpr *E,
315                                unsigned IntrinsicID) {
316   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
317   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
318 
319   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
320   return CGF.Builder.CreateCall(F, {Src0, Src1});
321 }
322 
323 namespace {
324   struct WidthAndSignedness {
325     unsigned Width;
326     bool Signed;
327   };
328 }
329 
330 static WidthAndSignedness
331 getIntegerWidthAndSignedness(const clang::ASTContext &context,
332                              const clang::QualType Type) {
333   assert(Type->isIntegerType() && "Given type is not an integer.");
334   unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
335   bool Signed = Type->isSignedIntegerType();
336   return {Width, Signed};
337 }
338 
339 // Given one or more integer types, this function produces an integer type that
340 // encompasses them: any value in one of the given types could be expressed in
341 // the encompassing type.
342 static struct WidthAndSignedness
343 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
344   assert(Types.size() > 0 && "Empty list of types.");
345 
346   // If any of the given types is signed, we must return a signed type.
347   bool Signed = false;
348   for (const auto &Type : Types) {
349     Signed |= Type.Signed;
350   }
351 
352   // The encompassing type must have a width greater than or equal to the width
353   // of the specified types.  Aditionally, if the encompassing type is signed,
354   // its width must be strictly greater than the width of any unsigned types
355   // given.
356   unsigned Width = 0;
357   for (const auto &Type : Types) {
358     unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
359     if (Width < MinWidth) {
360       Width = MinWidth;
361     }
362   }
363 
364   return {Width, Signed};
365 }
366 
367 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
368   llvm::Type *DestType = Int8PtrTy;
369   if (ArgValue->getType() != DestType)
370     ArgValue =
371         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
372 
373   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
374   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
375 }
376 
377 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
378 /// __builtin_object_size(p, @p To) is correct
379 static bool areBOSTypesCompatible(int From, int To) {
380   // Note: Our __builtin_object_size implementation currently treats Type=0 and
381   // Type=2 identically. Encoding this implementation detail here may make
382   // improving __builtin_object_size difficult in the future, so it's omitted.
383   return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
384 }
385 
386 static llvm::Value *
387 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
388   return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
389 }
390 
391 llvm::Value *
392 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
393                                                  llvm::IntegerType *ResType) {
394   uint64_t ObjectSize;
395   if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
396     return emitBuiltinObjectSize(E, Type, ResType);
397   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
398 }
399 
400 /// Returns a Value corresponding to the size of the given expression.
401 /// This Value may be either of the following:
402 ///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
403 ///     it)
404 ///   - A call to the @llvm.objectsize intrinsic
405 llvm::Value *
406 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
407                                        llvm::IntegerType *ResType) {
408   // We need to reference an argument if the pointer is a parameter with the
409   // pass_object_size attribute.
410   if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
411     auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
412     auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
413     if (Param != nullptr && PS != nullptr &&
414         areBOSTypesCompatible(PS->getType(), Type)) {
415       auto Iter = SizeArguments.find(Param);
416       assert(Iter != SizeArguments.end());
417 
418       const ImplicitParamDecl *D = Iter->second;
419       auto DIter = LocalDeclMap.find(D);
420       assert(DIter != LocalDeclMap.end());
421 
422       return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
423                               getContext().getSizeType(), E->getLocStart());
424     }
425   }
426 
427   // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
428   // evaluate E for side-effects. In either case, we shouldn't lower to
429   // @llvm.objectsize.
430   if (Type == 3 || E->HasSideEffects(getContext()))
431     return getDefaultBuiltinObjectSizeResult(Type, ResType);
432 
433   // LLVM only supports 0 and 2, make sure that we pass along that
434   // as a boolean.
435   auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1);
436   // FIXME: Get right address space.
437   llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)};
438   Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
439   return Builder.CreateCall(F, {EmitScalarExpr(E), CI});
440 }
441 
442 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
443                                         unsigned BuiltinID, const CallExpr *E,
444                                         ReturnValueSlot ReturnValue) {
445   // See if we can constant fold this builtin.  If so, don't emit it at all.
446   Expr::EvalResult Result;
447   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
448       !Result.hasSideEffects()) {
449     if (Result.Val.isInt())
450       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
451                                                 Result.Val.getInt()));
452     if (Result.Val.isFloat())
453       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
454                                                Result.Val.getFloat()));
455   }
456 
457   switch (BuiltinID) {
458   default: break;  // Handle intrinsics and libm functions below.
459   case Builtin::BI__builtin___CFStringMakeConstantString:
460   case Builtin::BI__builtin___NSStringMakeConstantString:
461     return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
462   case Builtin::BI__builtin_stdarg_start:
463   case Builtin::BI__builtin_va_start:
464   case Builtin::BI__va_start:
465   case Builtin::BI__builtin_va_end:
466     return RValue::get(
467         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
468                            ? EmitScalarExpr(E->getArg(0))
469                            : EmitVAListRef(E->getArg(0)).getPointer(),
470                        BuiltinID != Builtin::BI__builtin_va_end));
471   case Builtin::BI__builtin_va_copy: {
472     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
473     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
474 
475     llvm::Type *Type = Int8PtrTy;
476 
477     DstPtr = Builder.CreateBitCast(DstPtr, Type);
478     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
479     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
480                                           {DstPtr, SrcPtr}));
481   }
482   case Builtin::BI__builtin_abs:
483   case Builtin::BI__builtin_labs:
484   case Builtin::BI__builtin_llabs: {
485     Value *ArgValue = EmitScalarExpr(E->getArg(0));
486 
487     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
488     Value *CmpResult =
489     Builder.CreateICmpSGE(ArgValue,
490                           llvm::Constant::getNullValue(ArgValue->getType()),
491                                                             "abscond");
492     Value *Result =
493       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
494 
495     return RValue::get(Result);
496   }
497   case Builtin::BI__builtin_fabs:
498   case Builtin::BI__builtin_fabsf:
499   case Builtin::BI__builtin_fabsl: {
500     Value *Arg1 = EmitScalarExpr(E->getArg(0));
501     Value *Result = EmitFAbs(*this, Arg1);
502     return RValue::get(Result);
503   }
504   case Builtin::BI__builtin_fmod:
505   case Builtin::BI__builtin_fmodf:
506   case Builtin::BI__builtin_fmodl: {
507     Value *Arg1 = EmitScalarExpr(E->getArg(0));
508     Value *Arg2 = EmitScalarExpr(E->getArg(1));
509     Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
510     return RValue::get(Result);
511   }
512 
513   case Builtin::BI__builtin_conj:
514   case Builtin::BI__builtin_conjf:
515   case Builtin::BI__builtin_conjl: {
516     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
517     Value *Real = ComplexVal.first;
518     Value *Imag = ComplexVal.second;
519     Value *Zero =
520       Imag->getType()->isFPOrFPVectorTy()
521         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
522         : llvm::Constant::getNullValue(Imag->getType());
523 
524     Imag = Builder.CreateFSub(Zero, Imag, "sub");
525     return RValue::getComplex(std::make_pair(Real, Imag));
526   }
527   case Builtin::BI__builtin_creal:
528   case Builtin::BI__builtin_crealf:
529   case Builtin::BI__builtin_creall:
530   case Builtin::BIcreal:
531   case Builtin::BIcrealf:
532   case Builtin::BIcreall: {
533     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
534     return RValue::get(ComplexVal.first);
535   }
536 
537   case Builtin::BI__builtin_cimag:
538   case Builtin::BI__builtin_cimagf:
539   case Builtin::BI__builtin_cimagl:
540   case Builtin::BIcimag:
541   case Builtin::BIcimagf:
542   case Builtin::BIcimagl: {
543     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
544     return RValue::get(ComplexVal.second);
545   }
546 
547   case Builtin::BI__builtin_ctzs:
548   case Builtin::BI__builtin_ctz:
549   case Builtin::BI__builtin_ctzl:
550   case Builtin::BI__builtin_ctzll: {
551     Value *ArgValue = EmitScalarExpr(E->getArg(0));
552 
553     llvm::Type *ArgType = ArgValue->getType();
554     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
555 
556     llvm::Type *ResultType = ConvertType(E->getType());
557     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
558     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
559     if (Result->getType() != ResultType)
560       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
561                                      "cast");
562     return RValue::get(Result);
563   }
564   case Builtin::BI__builtin_clzs:
565   case Builtin::BI__builtin_clz:
566   case Builtin::BI__builtin_clzl:
567   case Builtin::BI__builtin_clzll: {
568     Value *ArgValue = EmitScalarExpr(E->getArg(0));
569 
570     llvm::Type *ArgType = ArgValue->getType();
571     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
572 
573     llvm::Type *ResultType = ConvertType(E->getType());
574     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
575     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
576     if (Result->getType() != ResultType)
577       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
578                                      "cast");
579     return RValue::get(Result);
580   }
581   case Builtin::BI__builtin_ffs:
582   case Builtin::BI__builtin_ffsl:
583   case Builtin::BI__builtin_ffsll: {
584     // ffs(x) -> x ? cttz(x) + 1 : 0
585     Value *ArgValue = EmitScalarExpr(E->getArg(0));
586 
587     llvm::Type *ArgType = ArgValue->getType();
588     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
589 
590     llvm::Type *ResultType = ConvertType(E->getType());
591     Value *Tmp =
592         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
593                           llvm::ConstantInt::get(ArgType, 1));
594     Value *Zero = llvm::Constant::getNullValue(ArgType);
595     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
596     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
597     if (Result->getType() != ResultType)
598       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
599                                      "cast");
600     return RValue::get(Result);
601   }
602   case Builtin::BI__builtin_parity:
603   case Builtin::BI__builtin_parityl:
604   case Builtin::BI__builtin_parityll: {
605     // parity(x) -> ctpop(x) & 1
606     Value *ArgValue = EmitScalarExpr(E->getArg(0));
607 
608     llvm::Type *ArgType = ArgValue->getType();
609     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
610 
611     llvm::Type *ResultType = ConvertType(E->getType());
612     Value *Tmp = Builder.CreateCall(F, ArgValue);
613     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
614     if (Result->getType() != ResultType)
615       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
616                                      "cast");
617     return RValue::get(Result);
618   }
619   case Builtin::BI__builtin_popcount:
620   case Builtin::BI__builtin_popcountl:
621   case Builtin::BI__builtin_popcountll: {
622     Value *ArgValue = EmitScalarExpr(E->getArg(0));
623 
624     llvm::Type *ArgType = ArgValue->getType();
625     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
626 
627     llvm::Type *ResultType = ConvertType(E->getType());
628     Value *Result = Builder.CreateCall(F, ArgValue);
629     if (Result->getType() != ResultType)
630       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
631                                      "cast");
632     return RValue::get(Result);
633   }
634   case Builtin::BI__builtin_unpredictable: {
635     // Always return the argument of __builtin_unpredictable. LLVM does not
636     // handle this builtin. Metadata for this builtin should be added directly
637     // to instructions such as branches or switches that use it.
638     return RValue::get(EmitScalarExpr(E->getArg(0)));
639   }
640   case Builtin::BI__builtin_expect: {
641     Value *ArgValue = EmitScalarExpr(E->getArg(0));
642     llvm::Type *ArgType = ArgValue->getType();
643 
644     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
645     // Don't generate llvm.expect on -O0 as the backend won't use it for
646     // anything.
647     // Note, we still IRGen ExpectedValue because it could have side-effects.
648     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
649       return RValue::get(ArgValue);
650 
651     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
652     Value *Result =
653         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
654     return RValue::get(Result);
655   }
656   case Builtin::BI__builtin_assume_aligned: {
657     Value *PtrValue = EmitScalarExpr(E->getArg(0));
658     Value *OffsetValue =
659       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
660 
661     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
662     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
663     unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
664 
665     EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
666     return RValue::get(PtrValue);
667   }
668   case Builtin::BI__assume:
669   case Builtin::BI__builtin_assume: {
670     if (E->getArg(0)->HasSideEffects(getContext()))
671       return RValue::get(nullptr);
672 
673     Value *ArgValue = EmitScalarExpr(E->getArg(0));
674     Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
675     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
676   }
677   case Builtin::BI__builtin_bswap16:
678   case Builtin::BI__builtin_bswap32:
679   case Builtin::BI__builtin_bswap64: {
680     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
681   }
682   case Builtin::BI__builtin_bitreverse8:
683   case Builtin::BI__builtin_bitreverse16:
684   case Builtin::BI__builtin_bitreverse32:
685   case Builtin::BI__builtin_bitreverse64: {
686     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
687   }
688   case Builtin::BI__builtin_object_size: {
689     unsigned Type =
690         E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
691     auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
692 
693     // We pass this builtin onto the optimizer so that it can figure out the
694     // object size in more complex cases.
695     return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType));
696   }
697   case Builtin::BI__builtin_prefetch: {
698     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
699     // FIXME: Technically these constants should of type 'int', yes?
700     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
701       llvm::ConstantInt::get(Int32Ty, 0);
702     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
703       llvm::ConstantInt::get(Int32Ty, 3);
704     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
705     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
706     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
707   }
708   case Builtin::BI__builtin_readcyclecounter: {
709     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
710     return RValue::get(Builder.CreateCall(F));
711   }
712   case Builtin::BI__builtin___clear_cache: {
713     Value *Begin = EmitScalarExpr(E->getArg(0));
714     Value *End = EmitScalarExpr(E->getArg(1));
715     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
716     return RValue::get(Builder.CreateCall(F, {Begin, End}));
717   }
718   case Builtin::BI__builtin_trap:
719     return RValue::get(EmitTrapCall(Intrinsic::trap));
720   case Builtin::BI__debugbreak:
721     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
722   case Builtin::BI__builtin_unreachable: {
723     if (SanOpts.has(SanitizerKind::Unreachable)) {
724       SanitizerScope SanScope(this);
725       EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
726                                SanitizerKind::Unreachable),
727                 "builtin_unreachable", EmitCheckSourceLocation(E->getExprLoc()),
728                 None);
729     } else
730       Builder.CreateUnreachable();
731 
732     // We do need to preserve an insertion point.
733     EmitBlock(createBasicBlock("unreachable.cont"));
734 
735     return RValue::get(nullptr);
736   }
737 
738   case Builtin::BI__builtin_powi:
739   case Builtin::BI__builtin_powif:
740   case Builtin::BI__builtin_powil: {
741     Value *Base = EmitScalarExpr(E->getArg(0));
742     Value *Exponent = EmitScalarExpr(E->getArg(1));
743     llvm::Type *ArgType = Base->getType();
744     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
745     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
746   }
747 
748   case Builtin::BI__builtin_isgreater:
749   case Builtin::BI__builtin_isgreaterequal:
750   case Builtin::BI__builtin_isless:
751   case Builtin::BI__builtin_islessequal:
752   case Builtin::BI__builtin_islessgreater:
753   case Builtin::BI__builtin_isunordered: {
754     // Ordered comparisons: we know the arguments to these are matching scalar
755     // floating point values.
756     Value *LHS = EmitScalarExpr(E->getArg(0));
757     Value *RHS = EmitScalarExpr(E->getArg(1));
758 
759     switch (BuiltinID) {
760     default: llvm_unreachable("Unknown ordered comparison");
761     case Builtin::BI__builtin_isgreater:
762       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
763       break;
764     case Builtin::BI__builtin_isgreaterequal:
765       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
766       break;
767     case Builtin::BI__builtin_isless:
768       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
769       break;
770     case Builtin::BI__builtin_islessequal:
771       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
772       break;
773     case Builtin::BI__builtin_islessgreater:
774       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
775       break;
776     case Builtin::BI__builtin_isunordered:
777       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
778       break;
779     }
780     // ZExt bool to int type.
781     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
782   }
783   case Builtin::BI__builtin_isnan: {
784     Value *V = EmitScalarExpr(E->getArg(0));
785     V = Builder.CreateFCmpUNO(V, V, "cmp");
786     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
787   }
788 
789   case Builtin::BI__builtin_isinf:
790   case Builtin::BI__builtin_isfinite: {
791     // isinf(x)    --> fabs(x) == infinity
792     // isfinite(x) --> fabs(x) != infinity
793     // x != NaN via the ordered compare in either case.
794     Value *V = EmitScalarExpr(E->getArg(0));
795     Value *Fabs = EmitFAbs(*this, V);
796     Constant *Infinity = ConstantFP::getInfinity(V->getType());
797     CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
798                                   ? CmpInst::FCMP_OEQ
799                                   : CmpInst::FCMP_ONE;
800     Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
801     return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
802   }
803 
804   case Builtin::BI__builtin_isinf_sign: {
805     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
806     Value *Arg = EmitScalarExpr(E->getArg(0));
807     Value *AbsArg = EmitFAbs(*this, Arg);
808     Value *IsInf = Builder.CreateFCmpOEQ(
809         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
810     Value *IsNeg = EmitSignBit(*this, Arg);
811 
812     llvm::Type *IntTy = ConvertType(E->getType());
813     Value *Zero = Constant::getNullValue(IntTy);
814     Value *One = ConstantInt::get(IntTy, 1);
815     Value *NegativeOne = ConstantInt::get(IntTy, -1);
816     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
817     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
818     return RValue::get(Result);
819   }
820 
821   case Builtin::BI__builtin_isnormal: {
822     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
823     Value *V = EmitScalarExpr(E->getArg(0));
824     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
825 
826     Value *Abs = EmitFAbs(*this, V);
827     Value *IsLessThanInf =
828       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
829     APFloat Smallest = APFloat::getSmallestNormalized(
830                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
831     Value *IsNormal =
832       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
833                             "isnormal");
834     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
835     V = Builder.CreateAnd(V, IsNormal, "and");
836     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
837   }
838 
839   case Builtin::BI__builtin_fpclassify: {
840     Value *V = EmitScalarExpr(E->getArg(5));
841     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
842 
843     // Create Result
844     BasicBlock *Begin = Builder.GetInsertBlock();
845     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
846     Builder.SetInsertPoint(End);
847     PHINode *Result =
848       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
849                         "fpclassify_result");
850 
851     // if (V==0) return FP_ZERO
852     Builder.SetInsertPoint(Begin);
853     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
854                                           "iszero");
855     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
856     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
857     Builder.CreateCondBr(IsZero, End, NotZero);
858     Result->addIncoming(ZeroLiteral, Begin);
859 
860     // if (V != V) return FP_NAN
861     Builder.SetInsertPoint(NotZero);
862     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
863     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
864     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
865     Builder.CreateCondBr(IsNan, End, NotNan);
866     Result->addIncoming(NanLiteral, NotZero);
867 
868     // if (fabs(V) == infinity) return FP_INFINITY
869     Builder.SetInsertPoint(NotNan);
870     Value *VAbs = EmitFAbs(*this, V);
871     Value *IsInf =
872       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
873                             "isinf");
874     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
875     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
876     Builder.CreateCondBr(IsInf, End, NotInf);
877     Result->addIncoming(InfLiteral, NotNan);
878 
879     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
880     Builder.SetInsertPoint(NotInf);
881     APFloat Smallest = APFloat::getSmallestNormalized(
882         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
883     Value *IsNormal =
884       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
885                             "isnormal");
886     Value *NormalResult =
887       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
888                            EmitScalarExpr(E->getArg(3)));
889     Builder.CreateBr(End);
890     Result->addIncoming(NormalResult, NotInf);
891 
892     // return Result
893     Builder.SetInsertPoint(End);
894     return RValue::get(Result);
895   }
896 
897   case Builtin::BIalloca:
898   case Builtin::BI_alloca:
899   case Builtin::BI__builtin_alloca: {
900     Value *Size = EmitScalarExpr(E->getArg(0));
901     return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
902   }
903   case Builtin::BIbzero:
904   case Builtin::BI__builtin_bzero: {
905     Address Dest = EmitPointerWithAlignment(E->getArg(0));
906     Value *SizeVal = EmitScalarExpr(E->getArg(1));
907     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
908                         E->getArg(0)->getExprLoc(), FD, 0);
909     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
910     return RValue::get(Dest.getPointer());
911   }
912   case Builtin::BImemcpy:
913   case Builtin::BI__builtin_memcpy: {
914     Address Dest = EmitPointerWithAlignment(E->getArg(0));
915     Address Src = EmitPointerWithAlignment(E->getArg(1));
916     Value *SizeVal = EmitScalarExpr(E->getArg(2));
917     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
918                         E->getArg(0)->getExprLoc(), FD, 0);
919     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
920                         E->getArg(1)->getExprLoc(), FD, 1);
921     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
922     return RValue::get(Dest.getPointer());
923   }
924 
925   case Builtin::BI__builtin___memcpy_chk: {
926     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
927     llvm::APSInt Size, DstSize;
928     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
929         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
930       break;
931     if (Size.ugt(DstSize))
932       break;
933     Address Dest = EmitPointerWithAlignment(E->getArg(0));
934     Address Src = EmitPointerWithAlignment(E->getArg(1));
935     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
936     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
937     return RValue::get(Dest.getPointer());
938   }
939 
940   case Builtin::BI__builtin_objc_memmove_collectable: {
941     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
942     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
943     Value *SizeVal = EmitScalarExpr(E->getArg(2));
944     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
945                                                   DestAddr, SrcAddr, SizeVal);
946     return RValue::get(DestAddr.getPointer());
947   }
948 
949   case Builtin::BI__builtin___memmove_chk: {
950     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
951     llvm::APSInt Size, DstSize;
952     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
953         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
954       break;
955     if (Size.ugt(DstSize))
956       break;
957     Address Dest = EmitPointerWithAlignment(E->getArg(0));
958     Address Src = EmitPointerWithAlignment(E->getArg(1));
959     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
960     Builder.CreateMemMove(Dest, Src, SizeVal, false);
961     return RValue::get(Dest.getPointer());
962   }
963 
964   case Builtin::BImemmove:
965   case Builtin::BI__builtin_memmove: {
966     Address Dest = EmitPointerWithAlignment(E->getArg(0));
967     Address Src = EmitPointerWithAlignment(E->getArg(1));
968     Value *SizeVal = EmitScalarExpr(E->getArg(2));
969     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
970                         E->getArg(0)->getExprLoc(), FD, 0);
971     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
972                         E->getArg(1)->getExprLoc(), FD, 1);
973     Builder.CreateMemMove(Dest, Src, SizeVal, false);
974     return RValue::get(Dest.getPointer());
975   }
976   case Builtin::BImemset:
977   case Builtin::BI__builtin_memset: {
978     Address Dest = EmitPointerWithAlignment(E->getArg(0));
979     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
980                                          Builder.getInt8Ty());
981     Value *SizeVal = EmitScalarExpr(E->getArg(2));
982     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
983                         E->getArg(0)->getExprLoc(), FD, 0);
984     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
985     return RValue::get(Dest.getPointer());
986   }
987   case Builtin::BI__builtin___memset_chk: {
988     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
989     llvm::APSInt Size, DstSize;
990     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
991         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
992       break;
993     if (Size.ugt(DstSize))
994       break;
995     Address Dest = EmitPointerWithAlignment(E->getArg(0));
996     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
997                                          Builder.getInt8Ty());
998     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
999     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1000     return RValue::get(Dest.getPointer());
1001   }
1002   case Builtin::BI__builtin_dwarf_cfa: {
1003     // The offset in bytes from the first argument to the CFA.
1004     //
1005     // Why on earth is this in the frontend?  Is there any reason at
1006     // all that the backend can't reasonably determine this while
1007     // lowering llvm.eh.dwarf.cfa()?
1008     //
1009     // TODO: If there's a satisfactory reason, add a target hook for
1010     // this instead of hard-coding 0, which is correct for most targets.
1011     int32_t Offset = 0;
1012 
1013     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1014     return RValue::get(Builder.CreateCall(F,
1015                                       llvm::ConstantInt::get(Int32Ty, Offset)));
1016   }
1017   case Builtin::BI__builtin_return_address: {
1018     Value *Depth =
1019         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1020     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1021     return RValue::get(Builder.CreateCall(F, Depth));
1022   }
1023   case Builtin::BI__builtin_frame_address: {
1024     Value *Depth =
1025         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1026     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1027     return RValue::get(Builder.CreateCall(F, Depth));
1028   }
1029   case Builtin::BI__builtin_extract_return_addr: {
1030     Value *Address = EmitScalarExpr(E->getArg(0));
1031     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1032     return RValue::get(Result);
1033   }
1034   case Builtin::BI__builtin_frob_return_addr: {
1035     Value *Address = EmitScalarExpr(E->getArg(0));
1036     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1037     return RValue::get(Result);
1038   }
1039   case Builtin::BI__builtin_dwarf_sp_column: {
1040     llvm::IntegerType *Ty
1041       = cast<llvm::IntegerType>(ConvertType(E->getType()));
1042     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1043     if (Column == -1) {
1044       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1045       return RValue::get(llvm::UndefValue::get(Ty));
1046     }
1047     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1048   }
1049   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1050     Value *Address = EmitScalarExpr(E->getArg(0));
1051     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1052       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1053     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1054   }
1055   case Builtin::BI__builtin_eh_return: {
1056     Value *Int = EmitScalarExpr(E->getArg(0));
1057     Value *Ptr = EmitScalarExpr(E->getArg(1));
1058 
1059     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1060     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1061            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1062     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1063                                   ? Intrinsic::eh_return_i32
1064                                   : Intrinsic::eh_return_i64);
1065     Builder.CreateCall(F, {Int, Ptr});
1066     Builder.CreateUnreachable();
1067 
1068     // We do need to preserve an insertion point.
1069     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1070 
1071     return RValue::get(nullptr);
1072   }
1073   case Builtin::BI__builtin_unwind_init: {
1074     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1075     return RValue::get(Builder.CreateCall(F));
1076   }
1077   case Builtin::BI__builtin_extend_pointer: {
1078     // Extends a pointer to the size of an _Unwind_Word, which is
1079     // uint64_t on all platforms.  Generally this gets poked into a
1080     // register and eventually used as an address, so if the
1081     // addressing registers are wider than pointers and the platform
1082     // doesn't implicitly ignore high-order bits when doing
1083     // addressing, we need to make sure we zext / sext based on
1084     // the platform's expectations.
1085     //
1086     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1087 
1088     // Cast the pointer to intptr_t.
1089     Value *Ptr = EmitScalarExpr(E->getArg(0));
1090     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1091 
1092     // If that's 64 bits, we're done.
1093     if (IntPtrTy->getBitWidth() == 64)
1094       return RValue::get(Result);
1095 
1096     // Otherwise, ask the codegen data what to do.
1097     if (getTargetHooks().extendPointerWithSExt())
1098       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1099     else
1100       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1101   }
1102   case Builtin::BI__builtin_setjmp: {
1103     // Buffer is a void**.
1104     Address Buf = EmitPointerWithAlignment(E->getArg(0));
1105 
1106     // Store the frame pointer to the setjmp buffer.
1107     Value *FrameAddr =
1108       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1109                          ConstantInt::get(Int32Ty, 0));
1110     Builder.CreateStore(FrameAddr, Buf);
1111 
1112     // Store the stack pointer to the setjmp buffer.
1113     Value *StackAddr =
1114         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1115     Address StackSaveSlot =
1116       Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1117     Builder.CreateStore(StackAddr, StackSaveSlot);
1118 
1119     // Call LLVM's EH setjmp, which is lightweight.
1120     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1121     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1122     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1123   }
1124   case Builtin::BI__builtin_longjmp: {
1125     Value *Buf = EmitScalarExpr(E->getArg(0));
1126     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1127 
1128     // Call LLVM's EH longjmp, which is lightweight.
1129     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1130 
1131     // longjmp doesn't return; mark this as unreachable.
1132     Builder.CreateUnreachable();
1133 
1134     // We do need to preserve an insertion point.
1135     EmitBlock(createBasicBlock("longjmp.cont"));
1136 
1137     return RValue::get(nullptr);
1138   }
1139   case Builtin::BI__sync_fetch_and_add:
1140   case Builtin::BI__sync_fetch_and_sub:
1141   case Builtin::BI__sync_fetch_and_or:
1142   case Builtin::BI__sync_fetch_and_and:
1143   case Builtin::BI__sync_fetch_and_xor:
1144   case Builtin::BI__sync_fetch_and_nand:
1145   case Builtin::BI__sync_add_and_fetch:
1146   case Builtin::BI__sync_sub_and_fetch:
1147   case Builtin::BI__sync_and_and_fetch:
1148   case Builtin::BI__sync_or_and_fetch:
1149   case Builtin::BI__sync_xor_and_fetch:
1150   case Builtin::BI__sync_nand_and_fetch:
1151   case Builtin::BI__sync_val_compare_and_swap:
1152   case Builtin::BI__sync_bool_compare_and_swap:
1153   case Builtin::BI__sync_lock_test_and_set:
1154   case Builtin::BI__sync_lock_release:
1155   case Builtin::BI__sync_swap:
1156     llvm_unreachable("Shouldn't make it through sema");
1157   case Builtin::BI__sync_fetch_and_add_1:
1158   case Builtin::BI__sync_fetch_and_add_2:
1159   case Builtin::BI__sync_fetch_and_add_4:
1160   case Builtin::BI__sync_fetch_and_add_8:
1161   case Builtin::BI__sync_fetch_and_add_16:
1162     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1163   case Builtin::BI__sync_fetch_and_sub_1:
1164   case Builtin::BI__sync_fetch_and_sub_2:
1165   case Builtin::BI__sync_fetch_and_sub_4:
1166   case Builtin::BI__sync_fetch_and_sub_8:
1167   case Builtin::BI__sync_fetch_and_sub_16:
1168     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1169   case Builtin::BI__sync_fetch_and_or_1:
1170   case Builtin::BI__sync_fetch_and_or_2:
1171   case Builtin::BI__sync_fetch_and_or_4:
1172   case Builtin::BI__sync_fetch_and_or_8:
1173   case Builtin::BI__sync_fetch_and_or_16:
1174     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1175   case Builtin::BI__sync_fetch_and_and_1:
1176   case Builtin::BI__sync_fetch_and_and_2:
1177   case Builtin::BI__sync_fetch_and_and_4:
1178   case Builtin::BI__sync_fetch_and_and_8:
1179   case Builtin::BI__sync_fetch_and_and_16:
1180     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1181   case Builtin::BI__sync_fetch_and_xor_1:
1182   case Builtin::BI__sync_fetch_and_xor_2:
1183   case Builtin::BI__sync_fetch_and_xor_4:
1184   case Builtin::BI__sync_fetch_and_xor_8:
1185   case Builtin::BI__sync_fetch_and_xor_16:
1186     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1187   case Builtin::BI__sync_fetch_and_nand_1:
1188   case Builtin::BI__sync_fetch_and_nand_2:
1189   case Builtin::BI__sync_fetch_and_nand_4:
1190   case Builtin::BI__sync_fetch_and_nand_8:
1191   case Builtin::BI__sync_fetch_and_nand_16:
1192     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1193 
1194   // Clang extensions: not overloaded yet.
1195   case Builtin::BI__sync_fetch_and_min:
1196     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1197   case Builtin::BI__sync_fetch_and_max:
1198     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1199   case Builtin::BI__sync_fetch_and_umin:
1200     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1201   case Builtin::BI__sync_fetch_and_umax:
1202     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1203 
1204   case Builtin::BI__sync_add_and_fetch_1:
1205   case Builtin::BI__sync_add_and_fetch_2:
1206   case Builtin::BI__sync_add_and_fetch_4:
1207   case Builtin::BI__sync_add_and_fetch_8:
1208   case Builtin::BI__sync_add_and_fetch_16:
1209     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1210                                 llvm::Instruction::Add);
1211   case Builtin::BI__sync_sub_and_fetch_1:
1212   case Builtin::BI__sync_sub_and_fetch_2:
1213   case Builtin::BI__sync_sub_and_fetch_4:
1214   case Builtin::BI__sync_sub_and_fetch_8:
1215   case Builtin::BI__sync_sub_and_fetch_16:
1216     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1217                                 llvm::Instruction::Sub);
1218   case Builtin::BI__sync_and_and_fetch_1:
1219   case Builtin::BI__sync_and_and_fetch_2:
1220   case Builtin::BI__sync_and_and_fetch_4:
1221   case Builtin::BI__sync_and_and_fetch_8:
1222   case Builtin::BI__sync_and_and_fetch_16:
1223     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1224                                 llvm::Instruction::And);
1225   case Builtin::BI__sync_or_and_fetch_1:
1226   case Builtin::BI__sync_or_and_fetch_2:
1227   case Builtin::BI__sync_or_and_fetch_4:
1228   case Builtin::BI__sync_or_and_fetch_8:
1229   case Builtin::BI__sync_or_and_fetch_16:
1230     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1231                                 llvm::Instruction::Or);
1232   case Builtin::BI__sync_xor_and_fetch_1:
1233   case Builtin::BI__sync_xor_and_fetch_2:
1234   case Builtin::BI__sync_xor_and_fetch_4:
1235   case Builtin::BI__sync_xor_and_fetch_8:
1236   case Builtin::BI__sync_xor_and_fetch_16:
1237     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1238                                 llvm::Instruction::Xor);
1239   case Builtin::BI__sync_nand_and_fetch_1:
1240   case Builtin::BI__sync_nand_and_fetch_2:
1241   case Builtin::BI__sync_nand_and_fetch_4:
1242   case Builtin::BI__sync_nand_and_fetch_8:
1243   case Builtin::BI__sync_nand_and_fetch_16:
1244     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1245                                 llvm::Instruction::And, true);
1246 
1247   case Builtin::BI__sync_val_compare_and_swap_1:
1248   case Builtin::BI__sync_val_compare_and_swap_2:
1249   case Builtin::BI__sync_val_compare_and_swap_4:
1250   case Builtin::BI__sync_val_compare_and_swap_8:
1251   case Builtin::BI__sync_val_compare_and_swap_16:
1252     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1253 
1254   case Builtin::BI__sync_bool_compare_and_swap_1:
1255   case Builtin::BI__sync_bool_compare_and_swap_2:
1256   case Builtin::BI__sync_bool_compare_and_swap_4:
1257   case Builtin::BI__sync_bool_compare_and_swap_8:
1258   case Builtin::BI__sync_bool_compare_and_swap_16:
1259     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1260 
1261   case Builtin::BI__sync_swap_1:
1262   case Builtin::BI__sync_swap_2:
1263   case Builtin::BI__sync_swap_4:
1264   case Builtin::BI__sync_swap_8:
1265   case Builtin::BI__sync_swap_16:
1266     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1267 
1268   case Builtin::BI__sync_lock_test_and_set_1:
1269   case Builtin::BI__sync_lock_test_and_set_2:
1270   case Builtin::BI__sync_lock_test_and_set_4:
1271   case Builtin::BI__sync_lock_test_and_set_8:
1272   case Builtin::BI__sync_lock_test_and_set_16:
1273     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1274 
1275   case Builtin::BI__sync_lock_release_1:
1276   case Builtin::BI__sync_lock_release_2:
1277   case Builtin::BI__sync_lock_release_4:
1278   case Builtin::BI__sync_lock_release_8:
1279   case Builtin::BI__sync_lock_release_16: {
1280     Value *Ptr = EmitScalarExpr(E->getArg(0));
1281     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1282     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1283     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1284                                              StoreSize.getQuantity() * 8);
1285     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1286     llvm::StoreInst *Store =
1287       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1288                                  StoreSize);
1289     Store->setAtomic(llvm::AtomicOrdering::Release);
1290     return RValue::get(nullptr);
1291   }
1292 
1293   case Builtin::BI__sync_synchronize: {
1294     // We assume this is supposed to correspond to a C++0x-style
1295     // sequentially-consistent fence (i.e. this is only usable for
1296     // synchonization, not device I/O or anything like that). This intrinsic
1297     // is really badly designed in the sense that in theory, there isn't
1298     // any way to safely use it... but in practice, it mostly works
1299     // to use it with non-atomic loads and stores to get acquire/release
1300     // semantics.
1301     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1302     return RValue::get(nullptr);
1303   }
1304 
1305   case Builtin::BI__builtin_nontemporal_load:
1306     return RValue::get(EmitNontemporalLoad(*this, E));
1307   case Builtin::BI__builtin_nontemporal_store:
1308     return RValue::get(EmitNontemporalStore(*this, E));
1309   case Builtin::BI__c11_atomic_is_lock_free:
1310   case Builtin::BI__atomic_is_lock_free: {
1311     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1312     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1313     // _Atomic(T) is always properly-aligned.
1314     const char *LibCallName = "__atomic_is_lock_free";
1315     CallArgList Args;
1316     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1317              getContext().getSizeType());
1318     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1319       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1320                getContext().VoidPtrTy);
1321     else
1322       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1323                getContext().VoidPtrTy);
1324     const CGFunctionInfo &FuncInfo =
1325         CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1326     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1327     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1328     return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
1329   }
1330 
1331   case Builtin::BI__atomic_test_and_set: {
1332     // Look at the argument type to determine whether this is a volatile
1333     // operation. The parameter type is always volatile.
1334     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1335     bool Volatile =
1336         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1337 
1338     Value *Ptr = EmitScalarExpr(E->getArg(0));
1339     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1340     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1341     Value *NewVal = Builder.getInt8(1);
1342     Value *Order = EmitScalarExpr(E->getArg(1));
1343     if (isa<llvm::ConstantInt>(Order)) {
1344       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1345       AtomicRMWInst *Result = nullptr;
1346       switch (ord) {
1347       case 0:  // memory_order_relaxed
1348       default: // invalid order
1349         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1350                                          llvm::AtomicOrdering::Monotonic);
1351         break;
1352       case 1: // memory_order_consume
1353       case 2: // memory_order_acquire
1354         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1355                                          llvm::AtomicOrdering::Acquire);
1356         break;
1357       case 3: // memory_order_release
1358         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1359                                          llvm::AtomicOrdering::Release);
1360         break;
1361       case 4: // memory_order_acq_rel
1362 
1363         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1364                                          llvm::AtomicOrdering::AcquireRelease);
1365         break;
1366       case 5: // memory_order_seq_cst
1367         Result = Builder.CreateAtomicRMW(
1368             llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1369             llvm::AtomicOrdering::SequentiallyConsistent);
1370         break;
1371       }
1372       Result->setVolatile(Volatile);
1373       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1374     }
1375 
1376     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1377 
1378     llvm::BasicBlock *BBs[5] = {
1379       createBasicBlock("monotonic", CurFn),
1380       createBasicBlock("acquire", CurFn),
1381       createBasicBlock("release", CurFn),
1382       createBasicBlock("acqrel", CurFn),
1383       createBasicBlock("seqcst", CurFn)
1384     };
1385     llvm::AtomicOrdering Orders[5] = {
1386         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1387         llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1388         llvm::AtomicOrdering::SequentiallyConsistent};
1389 
1390     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1391     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1392 
1393     Builder.SetInsertPoint(ContBB);
1394     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1395 
1396     for (unsigned i = 0; i < 5; ++i) {
1397       Builder.SetInsertPoint(BBs[i]);
1398       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1399                                                    Ptr, NewVal, Orders[i]);
1400       RMW->setVolatile(Volatile);
1401       Result->addIncoming(RMW, BBs[i]);
1402       Builder.CreateBr(ContBB);
1403     }
1404 
1405     SI->addCase(Builder.getInt32(0), BBs[0]);
1406     SI->addCase(Builder.getInt32(1), BBs[1]);
1407     SI->addCase(Builder.getInt32(2), BBs[1]);
1408     SI->addCase(Builder.getInt32(3), BBs[2]);
1409     SI->addCase(Builder.getInt32(4), BBs[3]);
1410     SI->addCase(Builder.getInt32(5), BBs[4]);
1411 
1412     Builder.SetInsertPoint(ContBB);
1413     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1414   }
1415 
1416   case Builtin::BI__atomic_clear: {
1417     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1418     bool Volatile =
1419         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1420 
1421     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1422     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1423     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1424     Value *NewVal = Builder.getInt8(0);
1425     Value *Order = EmitScalarExpr(E->getArg(1));
1426     if (isa<llvm::ConstantInt>(Order)) {
1427       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1428       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1429       switch (ord) {
1430       case 0:  // memory_order_relaxed
1431       default: // invalid order
1432         Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1433         break;
1434       case 3:  // memory_order_release
1435         Store->setOrdering(llvm::AtomicOrdering::Release);
1436         break;
1437       case 5:  // memory_order_seq_cst
1438         Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1439         break;
1440       }
1441       return RValue::get(nullptr);
1442     }
1443 
1444     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1445 
1446     llvm::BasicBlock *BBs[3] = {
1447       createBasicBlock("monotonic", CurFn),
1448       createBasicBlock("release", CurFn),
1449       createBasicBlock("seqcst", CurFn)
1450     };
1451     llvm::AtomicOrdering Orders[3] = {
1452         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1453         llvm::AtomicOrdering::SequentiallyConsistent};
1454 
1455     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1456     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1457 
1458     for (unsigned i = 0; i < 3; ++i) {
1459       Builder.SetInsertPoint(BBs[i]);
1460       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1461       Store->setOrdering(Orders[i]);
1462       Builder.CreateBr(ContBB);
1463     }
1464 
1465     SI->addCase(Builder.getInt32(0), BBs[0]);
1466     SI->addCase(Builder.getInt32(3), BBs[1]);
1467     SI->addCase(Builder.getInt32(5), BBs[2]);
1468 
1469     Builder.SetInsertPoint(ContBB);
1470     return RValue::get(nullptr);
1471   }
1472 
1473   case Builtin::BI__atomic_thread_fence:
1474   case Builtin::BI__atomic_signal_fence:
1475   case Builtin::BI__c11_atomic_thread_fence:
1476   case Builtin::BI__c11_atomic_signal_fence: {
1477     llvm::SynchronizationScope Scope;
1478     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1479         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1480       Scope = llvm::SingleThread;
1481     else
1482       Scope = llvm::CrossThread;
1483     Value *Order = EmitScalarExpr(E->getArg(0));
1484     if (isa<llvm::ConstantInt>(Order)) {
1485       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1486       switch (ord) {
1487       case 0:  // memory_order_relaxed
1488       default: // invalid order
1489         break;
1490       case 1:  // memory_order_consume
1491       case 2:  // memory_order_acquire
1492         Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1493         break;
1494       case 3:  // memory_order_release
1495         Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1496         break;
1497       case 4:  // memory_order_acq_rel
1498         Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1499         break;
1500       case 5:  // memory_order_seq_cst
1501         Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
1502                             Scope);
1503         break;
1504       }
1505       return RValue::get(nullptr);
1506     }
1507 
1508     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1509     AcquireBB = createBasicBlock("acquire", CurFn);
1510     ReleaseBB = createBasicBlock("release", CurFn);
1511     AcqRelBB = createBasicBlock("acqrel", CurFn);
1512     SeqCstBB = createBasicBlock("seqcst", CurFn);
1513     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1514 
1515     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1516     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1517 
1518     Builder.SetInsertPoint(AcquireBB);
1519     Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1520     Builder.CreateBr(ContBB);
1521     SI->addCase(Builder.getInt32(1), AcquireBB);
1522     SI->addCase(Builder.getInt32(2), AcquireBB);
1523 
1524     Builder.SetInsertPoint(ReleaseBB);
1525     Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1526     Builder.CreateBr(ContBB);
1527     SI->addCase(Builder.getInt32(3), ReleaseBB);
1528 
1529     Builder.SetInsertPoint(AcqRelBB);
1530     Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1531     Builder.CreateBr(ContBB);
1532     SI->addCase(Builder.getInt32(4), AcqRelBB);
1533 
1534     Builder.SetInsertPoint(SeqCstBB);
1535     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope);
1536     Builder.CreateBr(ContBB);
1537     SI->addCase(Builder.getInt32(5), SeqCstBB);
1538 
1539     Builder.SetInsertPoint(ContBB);
1540     return RValue::get(nullptr);
1541   }
1542 
1543     // Library functions with special handling.
1544   case Builtin::BIsqrt:
1545   case Builtin::BIsqrtf:
1546   case Builtin::BIsqrtl: {
1547     // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1548     // in finite- or unsafe-math mode (the intrinsic has different semantics
1549     // for handling negative numbers compared to the library function, so
1550     // -fmath-errno=0 is not enough).
1551     if (!FD->hasAttr<ConstAttr>())
1552       break;
1553     if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1554           CGM.getCodeGenOpts().NoNaNsFPMath))
1555       break;
1556     Value *Arg0 = EmitScalarExpr(E->getArg(0));
1557     llvm::Type *ArgType = Arg0->getType();
1558     Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1559     return RValue::get(Builder.CreateCall(F, Arg0));
1560   }
1561 
1562   case Builtin::BI__builtin_pow:
1563   case Builtin::BI__builtin_powf:
1564   case Builtin::BI__builtin_powl:
1565   case Builtin::BIpow:
1566   case Builtin::BIpowf:
1567   case Builtin::BIpowl: {
1568     // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1569     if (!FD->hasAttr<ConstAttr>())
1570       break;
1571     Value *Base = EmitScalarExpr(E->getArg(0));
1572     Value *Exponent = EmitScalarExpr(E->getArg(1));
1573     llvm::Type *ArgType = Base->getType();
1574     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1575     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1576   }
1577 
1578   case Builtin::BIfma:
1579   case Builtin::BIfmaf:
1580   case Builtin::BIfmal:
1581   case Builtin::BI__builtin_fma:
1582   case Builtin::BI__builtin_fmaf:
1583   case Builtin::BI__builtin_fmal: {
1584     // Rewrite fma to intrinsic.
1585     Value *FirstArg = EmitScalarExpr(E->getArg(0));
1586     llvm::Type *ArgType = FirstArg->getType();
1587     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1588     return RValue::get(
1589         Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1590                                EmitScalarExpr(E->getArg(2))}));
1591   }
1592 
1593   case Builtin::BI__builtin_signbit:
1594   case Builtin::BI__builtin_signbitf:
1595   case Builtin::BI__builtin_signbitl: {
1596     return RValue::get(
1597         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1598                            ConvertType(E->getType())));
1599   }
1600   case Builtin::BI__builtin_annotation: {
1601     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1602     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1603                                       AnnVal->getType());
1604 
1605     // Get the annotation string, go through casts. Sema requires this to be a
1606     // non-wide string literal, potentially casted, so the cast<> is safe.
1607     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1608     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1609     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1610   }
1611   case Builtin::BI__builtin_addcb:
1612   case Builtin::BI__builtin_addcs:
1613   case Builtin::BI__builtin_addc:
1614   case Builtin::BI__builtin_addcl:
1615   case Builtin::BI__builtin_addcll:
1616   case Builtin::BI__builtin_subcb:
1617   case Builtin::BI__builtin_subcs:
1618   case Builtin::BI__builtin_subc:
1619   case Builtin::BI__builtin_subcl:
1620   case Builtin::BI__builtin_subcll: {
1621 
1622     // We translate all of these builtins from expressions of the form:
1623     //   int x = ..., y = ..., carryin = ..., carryout, result;
1624     //   result = __builtin_addc(x, y, carryin, &carryout);
1625     //
1626     // to LLVM IR of the form:
1627     //
1628     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1629     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1630     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
1631     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1632     //                                                       i32 %carryin)
1633     //   %result = extractvalue {i32, i1} %tmp2, 0
1634     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
1635     //   %tmp3 = or i1 %carry1, %carry2
1636     //   %tmp4 = zext i1 %tmp3 to i32
1637     //   store i32 %tmp4, i32* %carryout
1638 
1639     // Scalarize our inputs.
1640     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1641     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1642     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1643     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
1644 
1645     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1646     llvm::Intrinsic::ID IntrinsicId;
1647     switch (BuiltinID) {
1648     default: llvm_unreachable("Unknown multiprecision builtin id.");
1649     case Builtin::BI__builtin_addcb:
1650     case Builtin::BI__builtin_addcs:
1651     case Builtin::BI__builtin_addc:
1652     case Builtin::BI__builtin_addcl:
1653     case Builtin::BI__builtin_addcll:
1654       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1655       break;
1656     case Builtin::BI__builtin_subcb:
1657     case Builtin::BI__builtin_subcs:
1658     case Builtin::BI__builtin_subc:
1659     case Builtin::BI__builtin_subcl:
1660     case Builtin::BI__builtin_subcll:
1661       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1662       break;
1663     }
1664 
1665     // Construct our resulting LLVM IR expression.
1666     llvm::Value *Carry1;
1667     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
1668                                               X, Y, Carry1);
1669     llvm::Value *Carry2;
1670     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
1671                                               Sum1, Carryin, Carry2);
1672     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
1673                                                X->getType());
1674     Builder.CreateStore(CarryOut, CarryOutPtr);
1675     return RValue::get(Sum2);
1676   }
1677 
1678   case Builtin::BI__builtin_add_overflow:
1679   case Builtin::BI__builtin_sub_overflow:
1680   case Builtin::BI__builtin_mul_overflow: {
1681     const clang::Expr *LeftArg = E->getArg(0);
1682     const clang::Expr *RightArg = E->getArg(1);
1683     const clang::Expr *ResultArg = E->getArg(2);
1684 
1685     clang::QualType ResultQTy =
1686         ResultArg->getType()->castAs<PointerType>()->getPointeeType();
1687 
1688     WidthAndSignedness LeftInfo =
1689         getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
1690     WidthAndSignedness RightInfo =
1691         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
1692     WidthAndSignedness ResultInfo =
1693         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
1694     WidthAndSignedness EncompassingInfo =
1695         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
1696 
1697     llvm::Type *EncompassingLLVMTy =
1698         llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
1699 
1700     llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
1701 
1702     llvm::Intrinsic::ID IntrinsicId;
1703     switch (BuiltinID) {
1704     default:
1705       llvm_unreachable("Unknown overflow builtin id.");
1706     case Builtin::BI__builtin_add_overflow:
1707       IntrinsicId = EncompassingInfo.Signed
1708                         ? llvm::Intrinsic::sadd_with_overflow
1709                         : llvm::Intrinsic::uadd_with_overflow;
1710       break;
1711     case Builtin::BI__builtin_sub_overflow:
1712       IntrinsicId = EncompassingInfo.Signed
1713                         ? llvm::Intrinsic::ssub_with_overflow
1714                         : llvm::Intrinsic::usub_with_overflow;
1715       break;
1716     case Builtin::BI__builtin_mul_overflow:
1717       IntrinsicId = EncompassingInfo.Signed
1718                         ? llvm::Intrinsic::smul_with_overflow
1719                         : llvm::Intrinsic::umul_with_overflow;
1720       break;
1721     }
1722 
1723     llvm::Value *Left = EmitScalarExpr(LeftArg);
1724     llvm::Value *Right = EmitScalarExpr(RightArg);
1725     Address ResultPtr = EmitPointerWithAlignment(ResultArg);
1726 
1727     // Extend each operand to the encompassing type.
1728     Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
1729     Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
1730 
1731     // Perform the operation on the extended values.
1732     llvm::Value *Overflow, *Result;
1733     Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
1734 
1735     if (EncompassingInfo.Width > ResultInfo.Width) {
1736       // The encompassing type is wider than the result type, so we need to
1737       // truncate it.
1738       llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
1739 
1740       // To see if the truncation caused an overflow, we will extend
1741       // the result and then compare it to the original result.
1742       llvm::Value *ResultTruncExt = Builder.CreateIntCast(
1743           ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
1744       llvm::Value *TruncationOverflow =
1745           Builder.CreateICmpNE(Result, ResultTruncExt);
1746 
1747       Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
1748       Result = ResultTrunc;
1749     }
1750 
1751     // Finally, store the result using the pointer.
1752     bool isVolatile =
1753       ResultArg->getType()->getPointeeType().isVolatileQualified();
1754     Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
1755 
1756     return RValue::get(Overflow);
1757   }
1758 
1759   case Builtin::BI__builtin_uadd_overflow:
1760   case Builtin::BI__builtin_uaddl_overflow:
1761   case Builtin::BI__builtin_uaddll_overflow:
1762   case Builtin::BI__builtin_usub_overflow:
1763   case Builtin::BI__builtin_usubl_overflow:
1764   case Builtin::BI__builtin_usubll_overflow:
1765   case Builtin::BI__builtin_umul_overflow:
1766   case Builtin::BI__builtin_umull_overflow:
1767   case Builtin::BI__builtin_umulll_overflow:
1768   case Builtin::BI__builtin_sadd_overflow:
1769   case Builtin::BI__builtin_saddl_overflow:
1770   case Builtin::BI__builtin_saddll_overflow:
1771   case Builtin::BI__builtin_ssub_overflow:
1772   case Builtin::BI__builtin_ssubl_overflow:
1773   case Builtin::BI__builtin_ssubll_overflow:
1774   case Builtin::BI__builtin_smul_overflow:
1775   case Builtin::BI__builtin_smull_overflow:
1776   case Builtin::BI__builtin_smulll_overflow: {
1777 
1778     // We translate all of these builtins directly to the relevant llvm IR node.
1779 
1780     // Scalarize our inputs.
1781     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1782     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1783     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
1784 
1785     // Decide which of the overflow intrinsics we are lowering to:
1786     llvm::Intrinsic::ID IntrinsicId;
1787     switch (BuiltinID) {
1788     default: llvm_unreachable("Unknown overflow builtin id.");
1789     case Builtin::BI__builtin_uadd_overflow:
1790     case Builtin::BI__builtin_uaddl_overflow:
1791     case Builtin::BI__builtin_uaddll_overflow:
1792       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1793       break;
1794     case Builtin::BI__builtin_usub_overflow:
1795     case Builtin::BI__builtin_usubl_overflow:
1796     case Builtin::BI__builtin_usubll_overflow:
1797       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1798       break;
1799     case Builtin::BI__builtin_umul_overflow:
1800     case Builtin::BI__builtin_umull_overflow:
1801     case Builtin::BI__builtin_umulll_overflow:
1802       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
1803       break;
1804     case Builtin::BI__builtin_sadd_overflow:
1805     case Builtin::BI__builtin_saddl_overflow:
1806     case Builtin::BI__builtin_saddll_overflow:
1807       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
1808       break;
1809     case Builtin::BI__builtin_ssub_overflow:
1810     case Builtin::BI__builtin_ssubl_overflow:
1811     case Builtin::BI__builtin_ssubll_overflow:
1812       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
1813       break;
1814     case Builtin::BI__builtin_smul_overflow:
1815     case Builtin::BI__builtin_smull_overflow:
1816     case Builtin::BI__builtin_smulll_overflow:
1817       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
1818       break;
1819     }
1820 
1821 
1822     llvm::Value *Carry;
1823     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
1824     Builder.CreateStore(Sum, SumOutPtr);
1825 
1826     return RValue::get(Carry);
1827   }
1828   case Builtin::BI__builtin_addressof:
1829     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
1830   case Builtin::BI__builtin_operator_new:
1831     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
1832                                     E->getArg(0), false);
1833   case Builtin::BI__builtin_operator_delete:
1834     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
1835                                     E->getArg(0), true);
1836   case Builtin::BI__noop:
1837     // __noop always evaluates to an integer literal zero.
1838     return RValue::get(ConstantInt::get(IntTy, 0));
1839   case Builtin::BI__builtin_call_with_static_chain: {
1840     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
1841     const Expr *Chain = E->getArg(1);
1842     return EmitCall(Call->getCallee()->getType(),
1843                     EmitScalarExpr(Call->getCallee()), Call, ReturnValue,
1844                     Call->getCalleeDecl(), EmitScalarExpr(Chain));
1845   }
1846   case Builtin::BI_InterlockedExchange:
1847   case Builtin::BI_InterlockedExchangePointer:
1848     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1849   case Builtin::BI_InterlockedCompareExchangePointer: {
1850     llvm::Type *RTy;
1851     llvm::IntegerType *IntType =
1852       IntegerType::get(getLLVMContext(),
1853                        getContext().getTypeSize(E->getType()));
1854     llvm::Type *IntPtrType = IntType->getPointerTo();
1855 
1856     llvm::Value *Destination =
1857       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
1858 
1859     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
1860     RTy = Exchange->getType();
1861     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
1862 
1863     llvm::Value *Comparand =
1864       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
1865 
1866     auto Result =
1867         Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
1868                                     AtomicOrdering::SequentiallyConsistent,
1869                                     AtomicOrdering::SequentiallyConsistent);
1870     Result->setVolatile(true);
1871 
1872     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
1873                                                                          0),
1874                                               RTy));
1875   }
1876   case Builtin::BI_InterlockedCompareExchange: {
1877     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
1878         EmitScalarExpr(E->getArg(0)),
1879         EmitScalarExpr(E->getArg(2)),
1880         EmitScalarExpr(E->getArg(1)),
1881         AtomicOrdering::SequentiallyConsistent,
1882         AtomicOrdering::SequentiallyConsistent);
1883       CXI->setVolatile(true);
1884       return RValue::get(Builder.CreateExtractValue(CXI, 0));
1885   }
1886   case Builtin::BI_InterlockedIncrement: {
1887     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1888       AtomicRMWInst::Add,
1889       EmitScalarExpr(E->getArg(0)),
1890       ConstantInt::get(Int32Ty, 1),
1891       llvm::AtomicOrdering::SequentiallyConsistent);
1892     RMWI->setVolatile(true);
1893     return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(Int32Ty, 1)));
1894   }
1895   case Builtin::BI_InterlockedDecrement: {
1896     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1897       AtomicRMWInst::Sub,
1898       EmitScalarExpr(E->getArg(0)),
1899       ConstantInt::get(Int32Ty, 1),
1900       llvm::AtomicOrdering::SequentiallyConsistent);
1901     RMWI->setVolatile(true);
1902     return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(Int32Ty, 1)));
1903   }
1904   case Builtin::BI_InterlockedExchangeAdd: {
1905     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1906       AtomicRMWInst::Add,
1907       EmitScalarExpr(E->getArg(0)),
1908       EmitScalarExpr(E->getArg(1)),
1909       llvm::AtomicOrdering::SequentiallyConsistent);
1910     RMWI->setVolatile(true);
1911     return RValue::get(RMWI);
1912   }
1913   case Builtin::BI__readfsdword: {
1914     Value *IntToPtr =
1915       Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
1916                              llvm::PointerType::get(CGM.Int32Ty, 257));
1917     LoadInst *Load =
1918         Builder.CreateAlignedLoad(IntToPtr, /*Align=*/4, /*isVolatile=*/true);
1919     return RValue::get(Load);
1920   }
1921 
1922   case Builtin::BI__exception_code:
1923   case Builtin::BI_exception_code:
1924     return RValue::get(EmitSEHExceptionCode());
1925   case Builtin::BI__exception_info:
1926   case Builtin::BI_exception_info:
1927     return RValue::get(EmitSEHExceptionInfo());
1928   case Builtin::BI__abnormal_termination:
1929   case Builtin::BI_abnormal_termination:
1930     return RValue::get(EmitSEHAbnormalTermination());
1931   case Builtin::BI_setjmpex: {
1932     if (getTarget().getTriple().isOSMSVCRT()) {
1933       llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
1934       llvm::AttributeSet ReturnsTwiceAttr =
1935           AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
1936                             llvm::Attribute::ReturnsTwice);
1937       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
1938           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
1939           "_setjmpex", ReturnsTwiceAttr);
1940       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
1941           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
1942       llvm::Value *FrameAddr =
1943           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1944                              ConstantInt::get(Int32Ty, 0));
1945       llvm::Value *Args[] = {Buf, FrameAddr};
1946       llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
1947       CS.setAttributes(ReturnsTwiceAttr);
1948       return RValue::get(CS.getInstruction());
1949     }
1950     break;
1951   }
1952   case Builtin::BI_setjmp: {
1953     if (getTarget().getTriple().isOSMSVCRT()) {
1954       llvm::AttributeSet ReturnsTwiceAttr =
1955           AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
1956                             llvm::Attribute::ReturnsTwice);
1957       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
1958           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
1959       llvm::CallSite CS;
1960       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
1961         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
1962         llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
1963             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
1964             "_setjmp3", ReturnsTwiceAttr);
1965         llvm::Value *Count = ConstantInt::get(IntTy, 0);
1966         llvm::Value *Args[] = {Buf, Count};
1967         CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
1968       } else {
1969         llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
1970         llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
1971             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
1972             "_setjmp", ReturnsTwiceAttr);
1973         llvm::Value *FrameAddr =
1974             Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1975                                ConstantInt::get(Int32Ty, 0));
1976         llvm::Value *Args[] = {Buf, FrameAddr};
1977         CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
1978       }
1979       CS.setAttributes(ReturnsTwiceAttr);
1980       return RValue::get(CS.getInstruction());
1981     }
1982     break;
1983   }
1984 
1985   case Builtin::BI__GetExceptionInfo: {
1986     if (llvm::GlobalVariable *GV =
1987             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
1988       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
1989     break;
1990   }
1991 
1992   // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
1993   case Builtin::BIread_pipe:
1994   case Builtin::BIwrite_pipe: {
1995     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
1996           *Arg1 = EmitScalarExpr(E->getArg(1));
1997 
1998     // Type of the generic packet parameter.
1999     unsigned GenericAS =
2000         getContext().getTargetAddressSpace(LangAS::opencl_generic);
2001     llvm::Type *I8PTy = llvm::PointerType::get(
2002         llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2003 
2004     // Testing which overloaded version we should generate the call for.
2005     if (2U == E->getNumArgs()) {
2006       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2007                                                              : "__write_pipe_2";
2008       // Creating a generic function type to be able to call with any builtin or
2009       // user defined type.
2010       llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy};
2011       llvm::FunctionType *FTy = llvm::FunctionType::get(
2012           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2013       Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2014       return RValue::get(Builder.CreateCall(
2015           CGM.CreateRuntimeFunction(FTy, Name), {Arg0, BCast}));
2016     } else {
2017       assert(4 == E->getNumArgs() &&
2018              "Illegal number of parameters to pipe function");
2019       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2020                                                              : "__write_pipe_4";
2021 
2022       llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy};
2023       Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2024             *Arg3 = EmitScalarExpr(E->getArg(3));
2025       llvm::FunctionType *FTy = llvm::FunctionType::get(
2026           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2027       Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2028       // We know the third argument is an integer type, but we may need to cast
2029       // it to i32.
2030       if (Arg2->getType() != Int32Ty)
2031         Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2032       return RValue::get(Builder.CreateCall(
2033           CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1, Arg2, BCast}));
2034     }
2035   }
2036   // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2037   // functions
2038   case Builtin::BIreserve_read_pipe:
2039   case Builtin::BIreserve_write_pipe:
2040   case Builtin::BIwork_group_reserve_read_pipe:
2041   case Builtin::BIwork_group_reserve_write_pipe:
2042   case Builtin::BIsub_group_reserve_read_pipe:
2043   case Builtin::BIsub_group_reserve_write_pipe: {
2044     // Composing the mangled name for the function.
2045     const char *Name;
2046     if (BuiltinID == Builtin::BIreserve_read_pipe)
2047       Name = "__reserve_read_pipe";
2048     else if (BuiltinID == Builtin::BIreserve_write_pipe)
2049       Name = "__reserve_write_pipe";
2050     else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2051       Name = "__work_group_reserve_read_pipe";
2052     else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2053       Name = "__work_group_reserve_write_pipe";
2054     else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2055       Name = "__sub_group_reserve_read_pipe";
2056     else
2057       Name = "__sub_group_reserve_write_pipe";
2058 
2059     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2060           *Arg1 = EmitScalarExpr(E->getArg(1));
2061     llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2062 
2063     // Building the generic function prototype.
2064     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty};
2065     llvm::FunctionType *FTy = llvm::FunctionType::get(
2066         ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2067     // We know the second argument is an integer type, but we may need to cast
2068     // it to i32.
2069     if (Arg1->getType() != Int32Ty)
2070       Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2071     return RValue::get(
2072         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1}));
2073   }
2074   // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe commit read and write
2075   // functions
2076   case Builtin::BIcommit_read_pipe:
2077   case Builtin::BIcommit_write_pipe:
2078   case Builtin::BIwork_group_commit_read_pipe:
2079   case Builtin::BIwork_group_commit_write_pipe:
2080   case Builtin::BIsub_group_commit_read_pipe:
2081   case Builtin::BIsub_group_commit_write_pipe: {
2082     const char *Name;
2083     if (BuiltinID == Builtin::BIcommit_read_pipe)
2084       Name = "__commit_read_pipe";
2085     else if (BuiltinID == Builtin::BIcommit_write_pipe)
2086       Name = "__commit_write_pipe";
2087     else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2088       Name = "__work_group_commit_read_pipe";
2089     else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2090       Name = "__work_group_commit_write_pipe";
2091     else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2092       Name = "__sub_group_commit_read_pipe";
2093     else
2094       Name = "__sub_group_commit_write_pipe";
2095 
2096     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2097           *Arg1 = EmitScalarExpr(E->getArg(1));
2098 
2099     // Building the generic function prototype.
2100     llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType()};
2101     llvm::FunctionType *FTy =
2102         llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2103                                 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2104 
2105     return RValue::get(
2106         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1}));
2107   }
2108   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2109   case Builtin::BIget_pipe_num_packets:
2110   case Builtin::BIget_pipe_max_packets: {
2111     const char *Name;
2112     if (BuiltinID == Builtin::BIget_pipe_num_packets)
2113       Name = "__get_pipe_num_packets";
2114     else
2115       Name = "__get_pipe_max_packets";
2116 
2117     // Building the generic function prototype.
2118     Value *Arg0 = EmitScalarExpr(E->getArg(0));
2119     llvm::Type *ArgTys[] = {Arg0->getType()};
2120     llvm::FunctionType *FTy = llvm::FunctionType::get(
2121         Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2122 
2123     return RValue::get(
2124         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0}));
2125   }
2126 
2127   case Builtin::BIprintf:
2128     if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
2129       return EmitCUDADevicePrintfCallExpr(E, ReturnValue);
2130     break;
2131   case Builtin::BI__builtin_canonicalize:
2132   case Builtin::BI__builtin_canonicalizef:
2133   case Builtin::BI__builtin_canonicalizel:
2134     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2135   }
2136 
2137   // If this is an alias for a lib function (e.g. __builtin_sin), emit
2138   // the call using the normal call path, but using the unmangled
2139   // version of the function name.
2140   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
2141     return emitLibraryCall(*this, FD, E,
2142                            CGM.getBuiltinLibFunction(FD, BuiltinID));
2143 
2144   // If this is a predefined lib function (e.g. malloc), emit the call
2145   // using exactly the normal call path.
2146   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
2147     return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
2148 
2149   // Check that a call to a target specific builtin has the correct target
2150   // features.
2151   // This is down here to avoid non-target specific builtins, however, if
2152   // generic builtins start to require generic target features then we
2153   // can move this up to the beginning of the function.
2154   checkTargetFeatures(E, FD);
2155 
2156   // See if we have a target specific intrinsic.
2157   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
2158   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
2159   if (const char *Prefix =
2160           llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) {
2161     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
2162     // NOTE we dont need to perform a compatibility flag check here since the
2163     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
2164     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
2165     if (IntrinsicID == Intrinsic::not_intrinsic)
2166       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix, Name);
2167   }
2168 
2169   if (IntrinsicID != Intrinsic::not_intrinsic) {
2170     SmallVector<Value*, 16> Args;
2171 
2172     // Find out if any arguments are required to be integer constant
2173     // expressions.
2174     unsigned ICEArguments = 0;
2175     ASTContext::GetBuiltinTypeError Error;
2176     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2177     assert(Error == ASTContext::GE_None && "Should not codegen an error");
2178 
2179     Function *F = CGM.getIntrinsic(IntrinsicID);
2180     llvm::FunctionType *FTy = F->getFunctionType();
2181 
2182     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
2183       Value *ArgValue;
2184       // If this is a normal argument, just emit it as a scalar.
2185       if ((ICEArguments & (1 << i)) == 0) {
2186         ArgValue = EmitScalarExpr(E->getArg(i));
2187       } else {
2188         // If this is required to be a constant, constant fold it so that we
2189         // know that the generated intrinsic gets a ConstantInt.
2190         llvm::APSInt Result;
2191         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2192         assert(IsConst && "Constant arg isn't actually constant?");
2193         (void)IsConst;
2194         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2195       }
2196 
2197       // If the intrinsic arg type is different from the builtin arg type
2198       // we need to do a bit cast.
2199       llvm::Type *PTy = FTy->getParamType(i);
2200       if (PTy != ArgValue->getType()) {
2201         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2202                "Must be able to losslessly bit cast to param");
2203         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2204       }
2205 
2206       Args.push_back(ArgValue);
2207     }
2208 
2209     Value *V = Builder.CreateCall(F, Args);
2210     QualType BuiltinRetType = E->getType();
2211 
2212     llvm::Type *RetTy = VoidTy;
2213     if (!BuiltinRetType->isVoidType())
2214       RetTy = ConvertType(BuiltinRetType);
2215 
2216     if (RetTy != V->getType()) {
2217       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
2218              "Must be able to losslessly bit cast result type");
2219       V = Builder.CreateBitCast(V, RetTy);
2220     }
2221 
2222     return RValue::get(V);
2223   }
2224 
2225   // See if we have a target specific builtin that needs to be lowered.
2226   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
2227     return RValue::get(V);
2228 
2229   ErrorUnsupported(E, "builtin function");
2230 
2231   // Unknown builtin, for now just dump it out and return undef.
2232   return GetUndefRValue(E->getType());
2233 }
2234 
2235 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
2236                                         unsigned BuiltinID, const CallExpr *E,
2237                                         llvm::Triple::ArchType Arch) {
2238   switch (Arch) {
2239   case llvm::Triple::arm:
2240   case llvm::Triple::armeb:
2241   case llvm::Triple::thumb:
2242   case llvm::Triple::thumbeb:
2243     return CGF->EmitARMBuiltinExpr(BuiltinID, E);
2244   case llvm::Triple::aarch64:
2245   case llvm::Triple::aarch64_be:
2246     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
2247   case llvm::Triple::x86:
2248   case llvm::Triple::x86_64:
2249     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
2250   case llvm::Triple::ppc:
2251   case llvm::Triple::ppc64:
2252   case llvm::Triple::ppc64le:
2253     return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
2254   case llvm::Triple::r600:
2255   case llvm::Triple::amdgcn:
2256     return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
2257   case llvm::Triple::systemz:
2258     return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
2259   case llvm::Triple::nvptx:
2260   case llvm::Triple::nvptx64:
2261     return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
2262   case llvm::Triple::wasm32:
2263   case llvm::Triple::wasm64:
2264     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
2265   default:
2266     return nullptr;
2267   }
2268 }
2269 
2270 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
2271                                               const CallExpr *E) {
2272   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
2273     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
2274     return EmitTargetArchBuiltinExpr(
2275         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
2276         getContext().getAuxTargetInfo()->getTriple().getArch());
2277   }
2278 
2279   return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
2280                                    getTarget().getTriple().getArch());
2281 }
2282 
2283 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
2284                                      NeonTypeFlags TypeFlags,
2285                                      bool V1Ty=false) {
2286   int IsQuad = TypeFlags.isQuad();
2287   switch (TypeFlags.getEltType()) {
2288   case NeonTypeFlags::Int8:
2289   case NeonTypeFlags::Poly8:
2290     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
2291   case NeonTypeFlags::Int16:
2292   case NeonTypeFlags::Poly16:
2293   case NeonTypeFlags::Float16:
2294     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
2295   case NeonTypeFlags::Int32:
2296     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
2297   case NeonTypeFlags::Int64:
2298   case NeonTypeFlags::Poly64:
2299     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
2300   case NeonTypeFlags::Poly128:
2301     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
2302     // There is a lot of i128 and f128 API missing.
2303     // so we use v16i8 to represent poly128 and get pattern matched.
2304     return llvm::VectorType::get(CGF->Int8Ty, 16);
2305   case NeonTypeFlags::Float32:
2306     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
2307   case NeonTypeFlags::Float64:
2308     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
2309   }
2310   llvm_unreachable("Unknown vector element type!");
2311 }
2312 
2313 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
2314                                           NeonTypeFlags IntTypeFlags) {
2315   int IsQuad = IntTypeFlags.isQuad();
2316   switch (IntTypeFlags.getEltType()) {
2317   case NeonTypeFlags::Int32:
2318     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
2319   case NeonTypeFlags::Int64:
2320     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
2321   default:
2322     llvm_unreachable("Type can't be converted to floating-point!");
2323   }
2324 }
2325 
2326 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
2327   unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
2328   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
2329   return Builder.CreateShuffleVector(V, V, SV, "lane");
2330 }
2331 
2332 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
2333                                      const char *name,
2334                                      unsigned shift, bool rightshift) {
2335   unsigned j = 0;
2336   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
2337        ai != ae; ++ai, ++j)
2338     if (shift > 0 && shift == j)
2339       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
2340     else
2341       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
2342 
2343   return Builder.CreateCall(F, Ops, name);
2344 }
2345 
2346 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
2347                                             bool neg) {
2348   int SV = cast<ConstantInt>(V)->getSExtValue();
2349   return ConstantInt::get(Ty, neg ? -SV : SV);
2350 }
2351 
2352 // \brief Right-shift a vector by a constant.
2353 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
2354                                           llvm::Type *Ty, bool usgn,
2355                                           const char *name) {
2356   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
2357 
2358   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
2359   int EltSize = VTy->getScalarSizeInBits();
2360 
2361   Vec = Builder.CreateBitCast(Vec, Ty);
2362 
2363   // lshr/ashr are undefined when the shift amount is equal to the vector
2364   // element size.
2365   if (ShiftAmt == EltSize) {
2366     if (usgn) {
2367       // Right-shifting an unsigned value by its size yields 0.
2368       return llvm::ConstantAggregateZero::get(VTy);
2369     } else {
2370       // Right-shifting a signed value by its size is equivalent
2371       // to a shift of size-1.
2372       --ShiftAmt;
2373       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
2374     }
2375   }
2376 
2377   Shift = EmitNeonShiftVector(Shift, Ty, false);
2378   if (usgn)
2379     return Builder.CreateLShr(Vec, Shift, name);
2380   else
2381     return Builder.CreateAShr(Vec, Shift, name);
2382 }
2383 
2384 enum {
2385   AddRetType = (1 << 0),
2386   Add1ArgType = (1 << 1),
2387   Add2ArgTypes = (1 << 2),
2388 
2389   VectorizeRetType = (1 << 3),
2390   VectorizeArgTypes = (1 << 4),
2391 
2392   InventFloatType = (1 << 5),
2393   UnsignedAlts = (1 << 6),
2394 
2395   Use64BitVectors = (1 << 7),
2396   Use128BitVectors = (1 << 8),
2397 
2398   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
2399   VectorRet = AddRetType | VectorizeRetType,
2400   VectorRetGetArgs01 =
2401       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
2402   FpCmpzModifiers =
2403       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
2404 };
2405 
2406 namespace {
2407 struct NeonIntrinsicInfo {
2408   const char *NameHint;
2409   unsigned BuiltinID;
2410   unsigned LLVMIntrinsic;
2411   unsigned AltLLVMIntrinsic;
2412   unsigned TypeModifier;
2413 
2414   bool operator<(unsigned RHSBuiltinID) const {
2415     return BuiltinID < RHSBuiltinID;
2416   }
2417   bool operator<(const NeonIntrinsicInfo &TE) const {
2418     return BuiltinID < TE.BuiltinID;
2419   }
2420 };
2421 } // end anonymous namespace
2422 
2423 #define NEONMAP0(NameBase) \
2424   { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
2425 
2426 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
2427   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
2428       Intrinsic::LLVMIntrinsic, 0, TypeModifier }
2429 
2430 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
2431   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
2432       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
2433       TypeModifier }
2434 
2435 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
2436   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2437   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2438   NEONMAP1(vabs_v, arm_neon_vabs, 0),
2439   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
2440   NEONMAP0(vaddhn_v),
2441   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
2442   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
2443   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
2444   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
2445   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
2446   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
2447   NEONMAP1(vcage_v, arm_neon_vacge, 0),
2448   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
2449   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
2450   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
2451   NEONMAP1(vcale_v, arm_neon_vacge, 0),
2452   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
2453   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
2454   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
2455   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
2456   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
2457   NEONMAP1(vclz_v, ctlz, Add1ArgType),
2458   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2459   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2460   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2461   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
2462   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
2463   NEONMAP0(vcvt_f32_v),
2464   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2465   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2466   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2467   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2468   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2469   NEONMAP0(vcvt_s32_v),
2470   NEONMAP0(vcvt_s64_v),
2471   NEONMAP0(vcvt_u32_v),
2472   NEONMAP0(vcvt_u64_v),
2473   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
2474   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
2475   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
2476   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
2477   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
2478   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
2479   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
2480   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
2481   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
2482   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
2483   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
2484   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
2485   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
2486   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
2487   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
2488   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
2489   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
2490   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
2491   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
2492   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
2493   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
2494   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
2495   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
2496   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
2497   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
2498   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
2499   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
2500   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
2501   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
2502   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
2503   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
2504   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
2505   NEONMAP0(vcvtq_f32_v),
2506   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2507   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2508   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2509   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2510   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2511   NEONMAP0(vcvtq_s32_v),
2512   NEONMAP0(vcvtq_s64_v),
2513   NEONMAP0(vcvtq_u32_v),
2514   NEONMAP0(vcvtq_u64_v),
2515   NEONMAP0(vext_v),
2516   NEONMAP0(vextq_v),
2517   NEONMAP0(vfma_v),
2518   NEONMAP0(vfmaq_v),
2519   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2520   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2521   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2522   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2523   NEONMAP0(vld1_dup_v),
2524   NEONMAP1(vld1_v, arm_neon_vld1, 0),
2525   NEONMAP0(vld1q_dup_v),
2526   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
2527   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
2528   NEONMAP1(vld2_v, arm_neon_vld2, 0),
2529   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
2530   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
2531   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
2532   NEONMAP1(vld3_v, arm_neon_vld3, 0),
2533   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
2534   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
2535   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
2536   NEONMAP1(vld4_v, arm_neon_vld4, 0),
2537   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
2538   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
2539   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
2540   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
2541   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
2542   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
2543   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
2544   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
2545   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
2546   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
2547   NEONMAP0(vmovl_v),
2548   NEONMAP0(vmovn_v),
2549   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
2550   NEONMAP0(vmull_v),
2551   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
2552   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
2553   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
2554   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
2555   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
2556   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
2557   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
2558   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
2559   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
2560   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
2561   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
2562   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
2563   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
2564   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
2565   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
2566   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
2567   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
2568   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
2569   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
2570   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
2571   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
2572   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
2573   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
2574   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
2575   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
2576   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
2577   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
2578   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
2579   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
2580   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
2581   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
2582   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
2583   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
2584   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
2585   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
2586   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
2587   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
2588   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
2589   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
2590   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
2591   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
2592   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
2593   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
2594   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
2595   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
2596   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
2597   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
2598   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
2599   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
2600   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
2601   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
2602   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
2603   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
2604   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
2605   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
2606   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
2607   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
2608   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
2609   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
2610   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
2611   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
2612   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
2613   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
2614   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
2615   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
2616   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
2617   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
2618   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
2619   NEONMAP0(vshl_n_v),
2620   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
2621   NEONMAP0(vshll_n_v),
2622   NEONMAP0(vshlq_n_v),
2623   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
2624   NEONMAP0(vshr_n_v),
2625   NEONMAP0(vshrn_n_v),
2626   NEONMAP0(vshrq_n_v),
2627   NEONMAP1(vst1_v, arm_neon_vst1, 0),
2628   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
2629   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
2630   NEONMAP1(vst2_v, arm_neon_vst2, 0),
2631   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
2632   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
2633   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
2634   NEONMAP1(vst3_v, arm_neon_vst3, 0),
2635   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
2636   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
2637   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
2638   NEONMAP1(vst4_v, arm_neon_vst4, 0),
2639   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
2640   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
2641   NEONMAP0(vsubhn_v),
2642   NEONMAP0(vtrn_v),
2643   NEONMAP0(vtrnq_v),
2644   NEONMAP0(vtst_v),
2645   NEONMAP0(vtstq_v),
2646   NEONMAP0(vuzp_v),
2647   NEONMAP0(vuzpq_v),
2648   NEONMAP0(vzip_v),
2649   NEONMAP0(vzipq_v)
2650 };
2651 
2652 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
2653   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
2654   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
2655   NEONMAP0(vaddhn_v),
2656   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
2657   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
2658   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
2659   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
2660   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
2661   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
2662   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
2663   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
2664   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
2665   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
2666   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
2667   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
2668   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
2669   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
2670   NEONMAP1(vclz_v, ctlz, Add1ArgType),
2671   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2672   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2673   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2674   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
2675   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
2676   NEONMAP0(vcvt_f32_v),
2677   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2678   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2679   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
2680   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
2681   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
2682   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
2683   NEONMAP0(vcvtq_f32_v),
2684   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2685   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2686   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
2687   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
2688   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
2689   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
2690   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
2691   NEONMAP0(vext_v),
2692   NEONMAP0(vextq_v),
2693   NEONMAP0(vfma_v),
2694   NEONMAP0(vfmaq_v),
2695   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
2696   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
2697   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
2698   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
2699   NEONMAP0(vmovl_v),
2700   NEONMAP0(vmovn_v),
2701   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
2702   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
2703   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
2704   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
2705   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
2706   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
2707   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
2708   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
2709   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
2710   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
2711   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
2712   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
2713   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
2714   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
2715   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
2716   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
2717   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
2718   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
2719   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
2720   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
2721   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
2722   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
2723   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
2724   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
2725   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
2726   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
2727   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
2728   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
2729   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
2730   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
2731   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
2732   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
2733   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
2734   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
2735   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
2736   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
2737   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
2738   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
2739   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
2740   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
2741   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
2742   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
2743   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
2744   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
2745   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
2746   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
2747   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
2748   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
2749   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
2750   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
2751   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
2752   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
2753   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
2754   NEONMAP0(vshl_n_v),
2755   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
2756   NEONMAP0(vshll_n_v),
2757   NEONMAP0(vshlq_n_v),
2758   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
2759   NEONMAP0(vshr_n_v),
2760   NEONMAP0(vshrn_n_v),
2761   NEONMAP0(vshrq_n_v),
2762   NEONMAP0(vsubhn_v),
2763   NEONMAP0(vtst_v),
2764   NEONMAP0(vtstq_v),
2765 };
2766 
2767 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
2768   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
2769   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
2770   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
2771   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
2772   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
2773   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
2774   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
2775   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
2776   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
2777   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2778   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
2779   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
2780   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
2781   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
2782   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2783   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2784   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
2785   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
2786   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
2787   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
2788   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
2789   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
2790   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
2791   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
2792   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
2793   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
2794   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
2795   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
2796   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
2797   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
2798   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
2799   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
2800   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
2801   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
2802   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
2803   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
2804   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
2805   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
2806   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
2807   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
2808   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
2809   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
2810   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
2811   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
2812   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
2813   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
2814   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
2815   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
2816   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
2817   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2818   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2819   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2820   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2821   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
2822   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
2823   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2824   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2825   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
2826   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
2827   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2828   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2829   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2830   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
2831   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
2832   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
2833   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
2834   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
2835   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
2836   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
2837   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
2838   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
2839   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
2840   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2841   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2842   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2843   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2844   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2845   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2846   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2847   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2848   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
2849   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
2850   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
2851   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
2852   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
2853   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
2854   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
2855   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
2856   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
2857   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
2858   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
2859   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
2860   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
2861   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
2862   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
2863   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
2864   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
2865   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
2866   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
2867   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
2868   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
2869   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
2870   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
2871   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
2872   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
2873   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
2874   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
2875   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
2876   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
2877   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
2878   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
2879   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
2880   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
2881   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
2882   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
2883   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
2884   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
2885   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
2886   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
2887   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
2888   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
2889   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
2890   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
2891   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
2892   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
2893   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
2894   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
2895   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
2896   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
2897   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
2898   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
2899   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
2900   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
2901   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
2902   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
2903   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
2904   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
2905   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
2906   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
2907   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
2908   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
2909   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
2910   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
2911   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
2912   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
2913   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
2914   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
2915   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
2916   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
2917   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
2918   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
2919   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
2920   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
2921   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
2922   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
2923   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
2924   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
2925   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
2926   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
2927   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
2928   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
2929   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
2930   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
2931   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
2932   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
2933   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
2934   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
2935   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
2936   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
2937   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
2938   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
2939   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
2940   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
2941   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
2942   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
2943   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
2944   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
2945   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
2946   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
2947   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
2948   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
2949   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
2950   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
2951   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
2952   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
2953   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
2954   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
2955   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
2956   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
2957   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
2958   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
2959   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
2960 };
2961 
2962 #undef NEONMAP0
2963 #undef NEONMAP1
2964 #undef NEONMAP2
2965 
2966 static bool NEONSIMDIntrinsicsProvenSorted = false;
2967 
2968 static bool AArch64SIMDIntrinsicsProvenSorted = false;
2969 static bool AArch64SISDIntrinsicsProvenSorted = false;
2970 
2971 
2972 static const NeonIntrinsicInfo *
2973 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
2974                        unsigned BuiltinID, bool &MapProvenSorted) {
2975 
2976 #ifndef NDEBUG
2977   if (!MapProvenSorted) {
2978     assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
2979     MapProvenSorted = true;
2980   }
2981 #endif
2982 
2983   const NeonIntrinsicInfo *Builtin =
2984       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
2985 
2986   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
2987     return Builtin;
2988 
2989   return nullptr;
2990 }
2991 
2992 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
2993                                                    unsigned Modifier,
2994                                                    llvm::Type *ArgType,
2995                                                    const CallExpr *E) {
2996   int VectorSize = 0;
2997   if (Modifier & Use64BitVectors)
2998     VectorSize = 64;
2999   else if (Modifier & Use128BitVectors)
3000     VectorSize = 128;
3001 
3002   // Return type.
3003   SmallVector<llvm::Type *, 3> Tys;
3004   if (Modifier & AddRetType) {
3005     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
3006     if (Modifier & VectorizeRetType)
3007       Ty = llvm::VectorType::get(
3008           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
3009 
3010     Tys.push_back(Ty);
3011   }
3012 
3013   // Arguments.
3014   if (Modifier & VectorizeArgTypes) {
3015     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
3016     ArgType = llvm::VectorType::get(ArgType, Elts);
3017   }
3018 
3019   if (Modifier & (Add1ArgType | Add2ArgTypes))
3020     Tys.push_back(ArgType);
3021 
3022   if (Modifier & Add2ArgTypes)
3023     Tys.push_back(ArgType);
3024 
3025   if (Modifier & InventFloatType)
3026     Tys.push_back(FloatTy);
3027 
3028   return CGM.getIntrinsic(IntrinsicID, Tys);
3029 }
3030 
3031 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
3032                                             const NeonIntrinsicInfo &SISDInfo,
3033                                             SmallVectorImpl<Value *> &Ops,
3034                                             const CallExpr *E) {
3035   unsigned BuiltinID = SISDInfo.BuiltinID;
3036   unsigned int Int = SISDInfo.LLVMIntrinsic;
3037   unsigned Modifier = SISDInfo.TypeModifier;
3038   const char *s = SISDInfo.NameHint;
3039 
3040   switch (BuiltinID) {
3041   case NEON::BI__builtin_neon_vcled_s64:
3042   case NEON::BI__builtin_neon_vcled_u64:
3043   case NEON::BI__builtin_neon_vcles_f32:
3044   case NEON::BI__builtin_neon_vcled_f64:
3045   case NEON::BI__builtin_neon_vcltd_s64:
3046   case NEON::BI__builtin_neon_vcltd_u64:
3047   case NEON::BI__builtin_neon_vclts_f32:
3048   case NEON::BI__builtin_neon_vcltd_f64:
3049   case NEON::BI__builtin_neon_vcales_f32:
3050   case NEON::BI__builtin_neon_vcaled_f64:
3051   case NEON::BI__builtin_neon_vcalts_f32:
3052   case NEON::BI__builtin_neon_vcaltd_f64:
3053     // Only one direction of comparisons actually exist, cmle is actually a cmge
3054     // with swapped operands. The table gives us the right intrinsic but we
3055     // still need to do the swap.
3056     std::swap(Ops[0], Ops[1]);
3057     break;
3058   }
3059 
3060   assert(Int && "Generic code assumes a valid intrinsic");
3061 
3062   // Determine the type(s) of this overloaded AArch64 intrinsic.
3063   const Expr *Arg = E->getArg(0);
3064   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3065   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3066 
3067   int j = 0;
3068   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3069   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3070        ai != ae; ++ai, ++j) {
3071     llvm::Type *ArgTy = ai->getType();
3072     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
3073              ArgTy->getPrimitiveSizeInBits())
3074       continue;
3075 
3076     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
3077     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
3078     // it before inserting.
3079     Ops[j] =
3080         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
3081     Ops[j] =
3082         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
3083   }
3084 
3085   Value *Result = CGF.EmitNeonCall(F, Ops, s);
3086   llvm::Type *ResultType = CGF.ConvertType(E->getType());
3087   if (ResultType->getPrimitiveSizeInBits() <
3088       Result->getType()->getPrimitiveSizeInBits())
3089     return CGF.Builder.CreateExtractElement(Result, C0);
3090 
3091   return CGF.Builder.CreateBitCast(Result, ResultType, s);
3092 }
3093 
3094 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
3095     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
3096     const char *NameHint, unsigned Modifier, const CallExpr *E,
3097     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
3098   // Get the last argument, which specifies the vector type.
3099   llvm::APSInt NeonTypeConst;
3100   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3101   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
3102     return nullptr;
3103 
3104   // Determine the type of this overloaded NEON intrinsic.
3105   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
3106   bool Usgn = Type.isUnsigned();
3107   bool Quad = Type.isQuad();
3108 
3109   llvm::VectorType *VTy = GetNeonType(this, Type);
3110   llvm::Type *Ty = VTy;
3111   if (!Ty)
3112     return nullptr;
3113 
3114   auto getAlignmentValue32 = [&](Address addr) -> Value* {
3115     return Builder.getInt32(addr.getAlignment().getQuantity());
3116   };
3117 
3118   unsigned Int = LLVMIntrinsic;
3119   if ((Modifier & UnsignedAlts) && !Usgn)
3120     Int = AltLLVMIntrinsic;
3121 
3122   switch (BuiltinID) {
3123   default: break;
3124   case NEON::BI__builtin_neon_vabs_v:
3125   case NEON::BI__builtin_neon_vabsq_v:
3126     if (VTy->getElementType()->isFloatingPointTy())
3127       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
3128     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
3129   case NEON::BI__builtin_neon_vaddhn_v: {
3130     llvm::VectorType *SrcTy =
3131         llvm::VectorType::getExtendedElementVectorType(VTy);
3132 
3133     // %sum = add <4 x i32> %lhs, %rhs
3134     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3135     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3136     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3137 
3138     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3139     Constant *ShiftAmt =
3140         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3141     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3142 
3143     // %res = trunc <4 x i32> %high to <4 x i16>
3144     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3145   }
3146   case NEON::BI__builtin_neon_vcale_v:
3147   case NEON::BI__builtin_neon_vcaleq_v:
3148   case NEON::BI__builtin_neon_vcalt_v:
3149   case NEON::BI__builtin_neon_vcaltq_v:
3150     std::swap(Ops[0], Ops[1]);
3151   case NEON::BI__builtin_neon_vcage_v:
3152   case NEON::BI__builtin_neon_vcageq_v:
3153   case NEON::BI__builtin_neon_vcagt_v:
3154   case NEON::BI__builtin_neon_vcagtq_v: {
3155     llvm::Type *VecFlt = llvm::VectorType::get(
3156         VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
3157         VTy->getNumElements());
3158     llvm::Type *Tys[] = { VTy, VecFlt };
3159     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3160     return EmitNeonCall(F, Ops, NameHint);
3161   }
3162   case NEON::BI__builtin_neon_vclz_v:
3163   case NEON::BI__builtin_neon_vclzq_v:
3164     // We generate target-independent intrinsic, which needs a second argument
3165     // for whether or not clz of zero is undefined; on ARM it isn't.
3166     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3167     break;
3168   case NEON::BI__builtin_neon_vcvt_f32_v:
3169   case NEON::BI__builtin_neon_vcvtq_f32_v:
3170     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3171     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3172     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3173                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3174   case NEON::BI__builtin_neon_vcvt_n_f32_v:
3175   case NEON::BI__builtin_neon_vcvt_n_f64_v:
3176   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3177   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3178     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3179     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
3180     Function *F = CGM.getIntrinsic(Int, Tys);
3181     return EmitNeonCall(F, Ops, "vcvt_n");
3182   }
3183   case NEON::BI__builtin_neon_vcvt_n_s32_v:
3184   case NEON::BI__builtin_neon_vcvt_n_u32_v:
3185   case NEON::BI__builtin_neon_vcvt_n_s64_v:
3186   case NEON::BI__builtin_neon_vcvt_n_u64_v:
3187   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3188   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3189   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3190   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3191     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3192     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3193     return EmitNeonCall(F, Ops, "vcvt_n");
3194   }
3195   case NEON::BI__builtin_neon_vcvt_s32_v:
3196   case NEON::BI__builtin_neon_vcvt_u32_v:
3197   case NEON::BI__builtin_neon_vcvt_s64_v:
3198   case NEON::BI__builtin_neon_vcvt_u64_v:
3199   case NEON::BI__builtin_neon_vcvtq_s32_v:
3200   case NEON::BI__builtin_neon_vcvtq_u32_v:
3201   case NEON::BI__builtin_neon_vcvtq_s64_v:
3202   case NEON::BI__builtin_neon_vcvtq_u64_v: {
3203     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3204     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3205                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3206   }
3207   case NEON::BI__builtin_neon_vcvta_s32_v:
3208   case NEON::BI__builtin_neon_vcvta_s64_v:
3209   case NEON::BI__builtin_neon_vcvta_u32_v:
3210   case NEON::BI__builtin_neon_vcvta_u64_v:
3211   case NEON::BI__builtin_neon_vcvtaq_s32_v:
3212   case NEON::BI__builtin_neon_vcvtaq_s64_v:
3213   case NEON::BI__builtin_neon_vcvtaq_u32_v:
3214   case NEON::BI__builtin_neon_vcvtaq_u64_v:
3215   case NEON::BI__builtin_neon_vcvtn_s32_v:
3216   case NEON::BI__builtin_neon_vcvtn_s64_v:
3217   case NEON::BI__builtin_neon_vcvtn_u32_v:
3218   case NEON::BI__builtin_neon_vcvtn_u64_v:
3219   case NEON::BI__builtin_neon_vcvtnq_s32_v:
3220   case NEON::BI__builtin_neon_vcvtnq_s64_v:
3221   case NEON::BI__builtin_neon_vcvtnq_u32_v:
3222   case NEON::BI__builtin_neon_vcvtnq_u64_v:
3223   case NEON::BI__builtin_neon_vcvtp_s32_v:
3224   case NEON::BI__builtin_neon_vcvtp_s64_v:
3225   case NEON::BI__builtin_neon_vcvtp_u32_v:
3226   case NEON::BI__builtin_neon_vcvtp_u64_v:
3227   case NEON::BI__builtin_neon_vcvtpq_s32_v:
3228   case NEON::BI__builtin_neon_vcvtpq_s64_v:
3229   case NEON::BI__builtin_neon_vcvtpq_u32_v:
3230   case NEON::BI__builtin_neon_vcvtpq_u64_v:
3231   case NEON::BI__builtin_neon_vcvtm_s32_v:
3232   case NEON::BI__builtin_neon_vcvtm_s64_v:
3233   case NEON::BI__builtin_neon_vcvtm_u32_v:
3234   case NEON::BI__builtin_neon_vcvtm_u64_v:
3235   case NEON::BI__builtin_neon_vcvtmq_s32_v:
3236   case NEON::BI__builtin_neon_vcvtmq_s64_v:
3237   case NEON::BI__builtin_neon_vcvtmq_u32_v:
3238   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
3239     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3240     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
3241   }
3242   case NEON::BI__builtin_neon_vext_v:
3243   case NEON::BI__builtin_neon_vextq_v: {
3244     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3245     SmallVector<Constant*, 16> Indices;
3246     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3247       Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
3248 
3249     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3250     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3251     Value *SV = llvm::ConstantVector::get(Indices);
3252     return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
3253   }
3254   case NEON::BI__builtin_neon_vfma_v:
3255   case NEON::BI__builtin_neon_vfmaq_v: {
3256     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
3257     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3258     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3259     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3260 
3261     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
3262     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
3263   }
3264   case NEON::BI__builtin_neon_vld1_v:
3265   case NEON::BI__builtin_neon_vld1q_v: {
3266     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3267     Ops.push_back(getAlignmentValue32(PtrOp0));
3268     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
3269   }
3270   case NEON::BI__builtin_neon_vld2_v:
3271   case NEON::BI__builtin_neon_vld2q_v:
3272   case NEON::BI__builtin_neon_vld3_v:
3273   case NEON::BI__builtin_neon_vld3q_v:
3274   case NEON::BI__builtin_neon_vld4_v:
3275   case NEON::BI__builtin_neon_vld4q_v: {
3276     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3277     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3278     Value *Align = getAlignmentValue32(PtrOp1);
3279     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
3280     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3281     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3282     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3283   }
3284   case NEON::BI__builtin_neon_vld1_dup_v:
3285   case NEON::BI__builtin_neon_vld1q_dup_v: {
3286     Value *V = UndefValue::get(Ty);
3287     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3288     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
3289     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
3290     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3291     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
3292     return EmitNeonSplat(Ops[0], CI);
3293   }
3294   case NEON::BI__builtin_neon_vld2_lane_v:
3295   case NEON::BI__builtin_neon_vld2q_lane_v:
3296   case NEON::BI__builtin_neon_vld3_lane_v:
3297   case NEON::BI__builtin_neon_vld3q_lane_v:
3298   case NEON::BI__builtin_neon_vld4_lane_v:
3299   case NEON::BI__builtin_neon_vld4q_lane_v: {
3300     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3301     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3302     for (unsigned I = 2; I < Ops.size() - 1; ++I)
3303       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
3304     Ops.push_back(getAlignmentValue32(PtrOp1));
3305     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
3306     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3307     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3308     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3309   }
3310   case NEON::BI__builtin_neon_vmovl_v: {
3311     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
3312     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
3313     if (Usgn)
3314       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
3315     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
3316   }
3317   case NEON::BI__builtin_neon_vmovn_v: {
3318     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3319     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
3320     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
3321   }
3322   case NEON::BI__builtin_neon_vmull_v:
3323     // FIXME: the integer vmull operations could be emitted in terms of pure
3324     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
3325     // hoisting the exts outside loops. Until global ISel comes along that can
3326     // see through such movement this leads to bad CodeGen. So we need an
3327     // intrinsic for now.
3328     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
3329     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
3330     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
3331   case NEON::BI__builtin_neon_vpadal_v:
3332   case NEON::BI__builtin_neon_vpadalq_v: {
3333     // The source operand type has twice as many elements of half the size.
3334     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3335     llvm::Type *EltTy =
3336       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3337     llvm::Type *NarrowTy =
3338       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3339     llvm::Type *Tys[2] = { Ty, NarrowTy };
3340     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
3341   }
3342   case NEON::BI__builtin_neon_vpaddl_v:
3343   case NEON::BI__builtin_neon_vpaddlq_v: {
3344     // The source operand type has twice as many elements of half the size.
3345     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3346     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3347     llvm::Type *NarrowTy =
3348       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3349     llvm::Type *Tys[2] = { Ty, NarrowTy };
3350     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
3351   }
3352   case NEON::BI__builtin_neon_vqdmlal_v:
3353   case NEON::BI__builtin_neon_vqdmlsl_v: {
3354     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
3355     Ops[1] =
3356         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
3357     Ops.resize(2);
3358     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
3359   }
3360   case NEON::BI__builtin_neon_vqshl_n_v:
3361   case NEON::BI__builtin_neon_vqshlq_n_v:
3362     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
3363                         1, false);
3364   case NEON::BI__builtin_neon_vqshlu_n_v:
3365   case NEON::BI__builtin_neon_vqshluq_n_v:
3366     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
3367                         1, false);
3368   case NEON::BI__builtin_neon_vrecpe_v:
3369   case NEON::BI__builtin_neon_vrecpeq_v:
3370   case NEON::BI__builtin_neon_vrsqrte_v:
3371   case NEON::BI__builtin_neon_vrsqrteq_v:
3372     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
3373     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
3374 
3375   case NEON::BI__builtin_neon_vrshr_n_v:
3376   case NEON::BI__builtin_neon_vrshrq_n_v:
3377     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
3378                         1, true);
3379   case NEON::BI__builtin_neon_vshl_n_v:
3380   case NEON::BI__builtin_neon_vshlq_n_v:
3381     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
3382     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
3383                              "vshl_n");
3384   case NEON::BI__builtin_neon_vshll_n_v: {
3385     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
3386     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3387     if (Usgn)
3388       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
3389     else
3390       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
3391     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
3392     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
3393   }
3394   case NEON::BI__builtin_neon_vshrn_n_v: {
3395     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3396     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3397     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
3398     if (Usgn)
3399       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
3400     else
3401       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
3402     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
3403   }
3404   case NEON::BI__builtin_neon_vshr_n_v:
3405   case NEON::BI__builtin_neon_vshrq_n_v:
3406     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
3407   case NEON::BI__builtin_neon_vst1_v:
3408   case NEON::BI__builtin_neon_vst1q_v:
3409   case NEON::BI__builtin_neon_vst2_v:
3410   case NEON::BI__builtin_neon_vst2q_v:
3411   case NEON::BI__builtin_neon_vst3_v:
3412   case NEON::BI__builtin_neon_vst3q_v:
3413   case NEON::BI__builtin_neon_vst4_v:
3414   case NEON::BI__builtin_neon_vst4q_v:
3415   case NEON::BI__builtin_neon_vst2_lane_v:
3416   case NEON::BI__builtin_neon_vst2q_lane_v:
3417   case NEON::BI__builtin_neon_vst3_lane_v:
3418   case NEON::BI__builtin_neon_vst3q_lane_v:
3419   case NEON::BI__builtin_neon_vst4_lane_v:
3420   case NEON::BI__builtin_neon_vst4q_lane_v: {
3421     llvm::Type *Tys[] = {Int8PtrTy, Ty};
3422     Ops.push_back(getAlignmentValue32(PtrOp0));
3423     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
3424   }
3425   case NEON::BI__builtin_neon_vsubhn_v: {
3426     llvm::VectorType *SrcTy =
3427         llvm::VectorType::getExtendedElementVectorType(VTy);
3428 
3429     // %sum = add <4 x i32> %lhs, %rhs
3430     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3431     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3432     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
3433 
3434     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3435     Constant *ShiftAmt =
3436         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3437     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
3438 
3439     // %res = trunc <4 x i32> %high to <4 x i16>
3440     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
3441   }
3442   case NEON::BI__builtin_neon_vtrn_v:
3443   case NEON::BI__builtin_neon_vtrnq_v: {
3444     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3445     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3446     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3447     Value *SV = nullptr;
3448 
3449     for (unsigned vi = 0; vi != 2; ++vi) {
3450       SmallVector<Constant*, 16> Indices;
3451       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3452         Indices.push_back(Builder.getInt32(i+vi));
3453         Indices.push_back(Builder.getInt32(i+e+vi));
3454       }
3455       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3456       SV = llvm::ConstantVector::get(Indices);
3457       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
3458       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3459     }
3460     return SV;
3461   }
3462   case NEON::BI__builtin_neon_vtst_v:
3463   case NEON::BI__builtin_neon_vtstq_v: {
3464     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3465     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3466     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
3467     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
3468                                 ConstantAggregateZero::get(Ty));
3469     return Builder.CreateSExt(Ops[0], Ty, "vtst");
3470   }
3471   case NEON::BI__builtin_neon_vuzp_v:
3472   case NEON::BI__builtin_neon_vuzpq_v: {
3473     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3474     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3475     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3476     Value *SV = nullptr;
3477 
3478     for (unsigned vi = 0; vi != 2; ++vi) {
3479       SmallVector<Constant*, 16> Indices;
3480       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3481         Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
3482 
3483       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3484       SV = llvm::ConstantVector::get(Indices);
3485       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
3486       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3487     }
3488     return SV;
3489   }
3490   case NEON::BI__builtin_neon_vzip_v:
3491   case NEON::BI__builtin_neon_vzipq_v: {
3492     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3493     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3494     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3495     Value *SV = nullptr;
3496 
3497     for (unsigned vi = 0; vi != 2; ++vi) {
3498       SmallVector<Constant*, 16> Indices;
3499       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3500         Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
3501         Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
3502       }
3503       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3504       SV = llvm::ConstantVector::get(Indices);
3505       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
3506       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3507     }
3508     return SV;
3509   }
3510   }
3511 
3512   assert(Int && "Expected valid intrinsic number");
3513 
3514   // Determine the type(s) of this overloaded AArch64 intrinsic.
3515   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
3516 
3517   Value *Result = EmitNeonCall(F, Ops, NameHint);
3518   llvm::Type *ResultType = ConvertType(E->getType());
3519   // AArch64 intrinsic one-element vector type cast to
3520   // scalar type expected by the builtin
3521   return Builder.CreateBitCast(Result, ResultType, NameHint);
3522 }
3523 
3524 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
3525     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
3526     const CmpInst::Predicate Ip, const Twine &Name) {
3527   llvm::Type *OTy = Op->getType();
3528 
3529   // FIXME: this is utterly horrific. We should not be looking at previous
3530   // codegen context to find out what needs doing. Unfortunately TableGen
3531   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
3532   // (etc).
3533   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
3534     OTy = BI->getOperand(0)->getType();
3535 
3536   Op = Builder.CreateBitCast(Op, OTy);
3537   if (OTy->getScalarType()->isFloatingPointTy()) {
3538     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
3539   } else {
3540     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
3541   }
3542   return Builder.CreateSExt(Op, Ty, Name);
3543 }
3544 
3545 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
3546                                  Value *ExtOp, Value *IndexOp,
3547                                  llvm::Type *ResTy, unsigned IntID,
3548                                  const char *Name) {
3549   SmallVector<Value *, 2> TblOps;
3550   if (ExtOp)
3551     TblOps.push_back(ExtOp);
3552 
3553   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
3554   SmallVector<Constant*, 16> Indices;
3555   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
3556   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
3557     Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i));
3558     Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1));
3559   }
3560   Value *SV = llvm::ConstantVector::get(Indices);
3561 
3562   int PairPos = 0, End = Ops.size() - 1;
3563   while (PairPos < End) {
3564     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
3565                                                      Ops[PairPos+1], SV, Name));
3566     PairPos += 2;
3567   }
3568 
3569   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
3570   // of the 128-bit lookup table with zero.
3571   if (PairPos == End) {
3572     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
3573     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
3574                                                      ZeroTbl, SV, Name));
3575   }
3576 
3577   Function *TblF;
3578   TblOps.push_back(IndexOp);
3579   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
3580 
3581   return CGF.EmitNeonCall(TblF, TblOps, Name);
3582 }
3583 
3584 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
3585   unsigned Value;
3586   switch (BuiltinID) {
3587   default:
3588     return nullptr;
3589   case ARM::BI__builtin_arm_nop:
3590     Value = 0;
3591     break;
3592   case ARM::BI__builtin_arm_yield:
3593   case ARM::BI__yield:
3594     Value = 1;
3595     break;
3596   case ARM::BI__builtin_arm_wfe:
3597   case ARM::BI__wfe:
3598     Value = 2;
3599     break;
3600   case ARM::BI__builtin_arm_wfi:
3601   case ARM::BI__wfi:
3602     Value = 3;
3603     break;
3604   case ARM::BI__builtin_arm_sev:
3605   case ARM::BI__sev:
3606     Value = 4;
3607     break;
3608   case ARM::BI__builtin_arm_sevl:
3609   case ARM::BI__sevl:
3610     Value = 5;
3611     break;
3612   }
3613 
3614   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
3615                             llvm::ConstantInt::get(Int32Ty, Value));
3616 }
3617 
3618 // Generates the IR for the read/write special register builtin,
3619 // ValueType is the type of the value that is to be written or read,
3620 // RegisterType is the type of the register being written to or read from.
3621 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
3622                                          const CallExpr *E,
3623                                          llvm::Type *RegisterType,
3624                                          llvm::Type *ValueType, bool IsRead) {
3625   // write and register intrinsics only support 32 and 64 bit operations.
3626   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
3627           && "Unsupported size for register.");
3628 
3629   CodeGen::CGBuilderTy &Builder = CGF.Builder;
3630   CodeGen::CodeGenModule &CGM = CGF.CGM;
3631   LLVMContext &Context = CGM.getLLVMContext();
3632 
3633   const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
3634   StringRef SysReg = cast<StringLiteral>(SysRegStrExpr)->getString();
3635 
3636   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
3637   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
3638   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
3639 
3640   llvm::Type *Types[] = { RegisterType };
3641 
3642   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
3643   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
3644             && "Can't fit 64-bit value in 32-bit register");
3645 
3646   if (IsRead) {
3647     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
3648     llvm::Value *Call = Builder.CreateCall(F, Metadata);
3649 
3650     if (MixedTypes)
3651       // Read into 64 bit register and then truncate result to 32 bit.
3652       return Builder.CreateTrunc(Call, ValueType);
3653 
3654     if (ValueType->isPointerTy())
3655       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
3656       return Builder.CreateIntToPtr(Call, ValueType);
3657 
3658     return Call;
3659   }
3660 
3661   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
3662   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
3663   if (MixedTypes) {
3664     // Extend 32 bit write value to 64 bit to pass to write.
3665     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
3666     return Builder.CreateCall(F, { Metadata, ArgValue });
3667   }
3668 
3669   if (ValueType->isPointerTy()) {
3670     // Have VoidPtrTy ArgValue but want to return an i32/i64.
3671     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
3672     return Builder.CreateCall(F, { Metadata, ArgValue });
3673   }
3674 
3675   return Builder.CreateCall(F, { Metadata, ArgValue });
3676 }
3677 
3678 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
3679 /// argument that specifies the vector type.
3680 static bool HasExtraNeonArgument(unsigned BuiltinID) {
3681   switch (BuiltinID) {
3682   default: break;
3683   case NEON::BI__builtin_neon_vget_lane_i8:
3684   case NEON::BI__builtin_neon_vget_lane_i16:
3685   case NEON::BI__builtin_neon_vget_lane_i32:
3686   case NEON::BI__builtin_neon_vget_lane_i64:
3687   case NEON::BI__builtin_neon_vget_lane_f32:
3688   case NEON::BI__builtin_neon_vgetq_lane_i8:
3689   case NEON::BI__builtin_neon_vgetq_lane_i16:
3690   case NEON::BI__builtin_neon_vgetq_lane_i32:
3691   case NEON::BI__builtin_neon_vgetq_lane_i64:
3692   case NEON::BI__builtin_neon_vgetq_lane_f32:
3693   case NEON::BI__builtin_neon_vset_lane_i8:
3694   case NEON::BI__builtin_neon_vset_lane_i16:
3695   case NEON::BI__builtin_neon_vset_lane_i32:
3696   case NEON::BI__builtin_neon_vset_lane_i64:
3697   case NEON::BI__builtin_neon_vset_lane_f32:
3698   case NEON::BI__builtin_neon_vsetq_lane_i8:
3699   case NEON::BI__builtin_neon_vsetq_lane_i16:
3700   case NEON::BI__builtin_neon_vsetq_lane_i32:
3701   case NEON::BI__builtin_neon_vsetq_lane_i64:
3702   case NEON::BI__builtin_neon_vsetq_lane_f32:
3703   case NEON::BI__builtin_neon_vsha1h_u32:
3704   case NEON::BI__builtin_neon_vsha1cq_u32:
3705   case NEON::BI__builtin_neon_vsha1pq_u32:
3706   case NEON::BI__builtin_neon_vsha1mq_u32:
3707   case ARM::BI_MoveToCoprocessor:
3708   case ARM::BI_MoveToCoprocessor2:
3709     return false;
3710   }
3711   return true;
3712 }
3713 
3714 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
3715                                            const CallExpr *E) {
3716   if (auto Hint = GetValueForARMHint(BuiltinID))
3717     return Hint;
3718 
3719   if (BuiltinID == ARM::BI__emit) {
3720     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
3721     llvm::FunctionType *FTy =
3722         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
3723 
3724     APSInt Value;
3725     if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
3726       llvm_unreachable("Sema will ensure that the parameter is constant");
3727 
3728     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
3729 
3730     llvm::InlineAsm *Emit =
3731         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
3732                                  /*SideEffects=*/true)
3733                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
3734                                  /*SideEffects=*/true);
3735 
3736     return Builder.CreateCall(Emit);
3737   }
3738 
3739   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
3740     Value *Option = EmitScalarExpr(E->getArg(0));
3741     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
3742   }
3743 
3744   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
3745     Value *Address = EmitScalarExpr(E->getArg(0));
3746     Value *RW      = EmitScalarExpr(E->getArg(1));
3747     Value *IsData  = EmitScalarExpr(E->getArg(2));
3748 
3749     // Locality is not supported on ARM target
3750     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
3751 
3752     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
3753     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
3754   }
3755 
3756   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
3757     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit),
3758                                                EmitScalarExpr(E->getArg(0)),
3759                               "rbit");
3760   }
3761 
3762   if (BuiltinID == ARM::BI__clear_cache) {
3763     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
3764     const FunctionDecl *FD = E->getDirectCallee();
3765     Value *Ops[2];
3766     for (unsigned i = 0; i < 2; i++)
3767       Ops[i] = EmitScalarExpr(E->getArg(i));
3768     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
3769     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
3770     StringRef Name = FD->getName();
3771     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
3772   }
3773 
3774   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
3775       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
3776         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
3777        getContext().getTypeSize(E->getType()) == 64) ||
3778       BuiltinID == ARM::BI__ldrexd) {
3779     Function *F;
3780 
3781     switch (BuiltinID) {
3782     default: llvm_unreachable("unexpected builtin");
3783     case ARM::BI__builtin_arm_ldaex:
3784       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
3785       break;
3786     case ARM::BI__builtin_arm_ldrexd:
3787     case ARM::BI__builtin_arm_ldrex:
3788     case ARM::BI__ldrexd:
3789       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
3790       break;
3791     }
3792 
3793     Value *LdPtr = EmitScalarExpr(E->getArg(0));
3794     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
3795                                     "ldrexd");
3796 
3797     Value *Val0 = Builder.CreateExtractValue(Val, 1);
3798     Value *Val1 = Builder.CreateExtractValue(Val, 0);
3799     Val0 = Builder.CreateZExt(Val0, Int64Ty);
3800     Val1 = Builder.CreateZExt(Val1, Int64Ty);
3801 
3802     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
3803     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
3804     Val = Builder.CreateOr(Val, Val1);
3805     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
3806   }
3807 
3808   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
3809       BuiltinID == ARM::BI__builtin_arm_ldaex) {
3810     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
3811 
3812     QualType Ty = E->getType();
3813     llvm::Type *RealResTy = ConvertType(Ty);
3814     llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
3815                                                   getContext().getTypeSize(Ty));
3816     LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
3817 
3818     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
3819                                        ? Intrinsic::arm_ldaex
3820                                        : Intrinsic::arm_ldrex,
3821                                    LoadAddr->getType());
3822     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
3823 
3824     if (RealResTy->isPointerTy())
3825       return Builder.CreateIntToPtr(Val, RealResTy);
3826     else {
3827       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
3828       return Builder.CreateBitCast(Val, RealResTy);
3829     }
3830   }
3831 
3832   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
3833       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
3834         BuiltinID == ARM::BI__builtin_arm_strex) &&
3835        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
3836     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
3837                                        ? Intrinsic::arm_stlexd
3838                                        : Intrinsic::arm_strexd);
3839     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr);
3840 
3841     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
3842     Value *Val = EmitScalarExpr(E->getArg(0));
3843     Builder.CreateStore(Val, Tmp);
3844 
3845     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
3846     Val = Builder.CreateLoad(LdPtr);
3847 
3848     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
3849     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
3850     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
3851     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
3852   }
3853 
3854   if (BuiltinID == ARM::BI__builtin_arm_strex ||
3855       BuiltinID == ARM::BI__builtin_arm_stlex) {
3856     Value *StoreVal = EmitScalarExpr(E->getArg(0));
3857     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
3858 
3859     QualType Ty = E->getArg(0)->getType();
3860     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
3861                                                  getContext().getTypeSize(Ty));
3862     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
3863 
3864     if (StoreVal->getType()->isPointerTy())
3865       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
3866     else {
3867       StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
3868       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
3869     }
3870 
3871     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
3872                                        ? Intrinsic::arm_stlex
3873                                        : Intrinsic::arm_strex,
3874                                    StoreAddr->getType());
3875     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
3876   }
3877 
3878   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
3879     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
3880     return Builder.CreateCall(F);
3881   }
3882 
3883   // CRC32
3884   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
3885   switch (BuiltinID) {
3886   case ARM::BI__builtin_arm_crc32b:
3887     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
3888   case ARM::BI__builtin_arm_crc32cb:
3889     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
3890   case ARM::BI__builtin_arm_crc32h:
3891     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
3892   case ARM::BI__builtin_arm_crc32ch:
3893     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
3894   case ARM::BI__builtin_arm_crc32w:
3895   case ARM::BI__builtin_arm_crc32d:
3896     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
3897   case ARM::BI__builtin_arm_crc32cw:
3898   case ARM::BI__builtin_arm_crc32cd:
3899     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
3900   }
3901 
3902   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
3903     Value *Arg0 = EmitScalarExpr(E->getArg(0));
3904     Value *Arg1 = EmitScalarExpr(E->getArg(1));
3905 
3906     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
3907     // intrinsics, hence we need different codegen for these cases.
3908     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
3909         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
3910       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
3911       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
3912       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
3913       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
3914 
3915       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3916       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
3917       return Builder.CreateCall(F, {Res, Arg1b});
3918     } else {
3919       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
3920 
3921       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3922       return Builder.CreateCall(F, {Arg0, Arg1});
3923     }
3924   }
3925 
3926   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
3927       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
3928       BuiltinID == ARM::BI__builtin_arm_rsrp ||
3929       BuiltinID == ARM::BI__builtin_arm_wsr ||
3930       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
3931       BuiltinID == ARM::BI__builtin_arm_wsrp) {
3932 
3933     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
3934                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
3935                   BuiltinID == ARM::BI__builtin_arm_rsrp;
3936 
3937     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
3938                             BuiltinID == ARM::BI__builtin_arm_wsrp;
3939 
3940     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
3941                    BuiltinID == ARM::BI__builtin_arm_wsr64;
3942 
3943     llvm::Type *ValueType;
3944     llvm::Type *RegisterType;
3945     if (IsPointerBuiltin) {
3946       ValueType = VoidPtrTy;
3947       RegisterType = Int32Ty;
3948     } else if (Is64Bit) {
3949       ValueType = RegisterType = Int64Ty;
3950     } else {
3951       ValueType = RegisterType = Int32Ty;
3952     }
3953 
3954     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
3955   }
3956 
3957   // Find out if any arguments are required to be integer constant
3958   // expressions.
3959   unsigned ICEArguments = 0;
3960   ASTContext::GetBuiltinTypeError Error;
3961   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3962   assert(Error == ASTContext::GE_None && "Should not codegen an error");
3963 
3964   auto getAlignmentValue32 = [&](Address addr) -> Value* {
3965     return Builder.getInt32(addr.getAlignment().getQuantity());
3966   };
3967 
3968   Address PtrOp0 = Address::invalid();
3969   Address PtrOp1 = Address::invalid();
3970   SmallVector<Value*, 4> Ops;
3971   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
3972   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
3973   for (unsigned i = 0, e = NumArgs; i != e; i++) {
3974     if (i == 0) {
3975       switch (BuiltinID) {
3976       case NEON::BI__builtin_neon_vld1_v:
3977       case NEON::BI__builtin_neon_vld1q_v:
3978       case NEON::BI__builtin_neon_vld1q_lane_v:
3979       case NEON::BI__builtin_neon_vld1_lane_v:
3980       case NEON::BI__builtin_neon_vld1_dup_v:
3981       case NEON::BI__builtin_neon_vld1q_dup_v:
3982       case NEON::BI__builtin_neon_vst1_v:
3983       case NEON::BI__builtin_neon_vst1q_v:
3984       case NEON::BI__builtin_neon_vst1q_lane_v:
3985       case NEON::BI__builtin_neon_vst1_lane_v:
3986       case NEON::BI__builtin_neon_vst2_v:
3987       case NEON::BI__builtin_neon_vst2q_v:
3988       case NEON::BI__builtin_neon_vst2_lane_v:
3989       case NEON::BI__builtin_neon_vst2q_lane_v:
3990       case NEON::BI__builtin_neon_vst3_v:
3991       case NEON::BI__builtin_neon_vst3q_v:
3992       case NEON::BI__builtin_neon_vst3_lane_v:
3993       case NEON::BI__builtin_neon_vst3q_lane_v:
3994       case NEON::BI__builtin_neon_vst4_v:
3995       case NEON::BI__builtin_neon_vst4q_v:
3996       case NEON::BI__builtin_neon_vst4_lane_v:
3997       case NEON::BI__builtin_neon_vst4q_lane_v:
3998         // Get the alignment for the argument in addition to the value;
3999         // we'll use it later.
4000         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
4001         Ops.push_back(PtrOp0.getPointer());
4002         continue;
4003       }
4004     }
4005     if (i == 1) {
4006       switch (BuiltinID) {
4007       case NEON::BI__builtin_neon_vld2_v:
4008       case NEON::BI__builtin_neon_vld2q_v:
4009       case NEON::BI__builtin_neon_vld3_v:
4010       case NEON::BI__builtin_neon_vld3q_v:
4011       case NEON::BI__builtin_neon_vld4_v:
4012       case NEON::BI__builtin_neon_vld4q_v:
4013       case NEON::BI__builtin_neon_vld2_lane_v:
4014       case NEON::BI__builtin_neon_vld2q_lane_v:
4015       case NEON::BI__builtin_neon_vld3_lane_v:
4016       case NEON::BI__builtin_neon_vld3q_lane_v:
4017       case NEON::BI__builtin_neon_vld4_lane_v:
4018       case NEON::BI__builtin_neon_vld4q_lane_v:
4019       case NEON::BI__builtin_neon_vld2_dup_v:
4020       case NEON::BI__builtin_neon_vld3_dup_v:
4021       case NEON::BI__builtin_neon_vld4_dup_v:
4022         // Get the alignment for the argument in addition to the value;
4023         // we'll use it later.
4024         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
4025         Ops.push_back(PtrOp1.getPointer());
4026         continue;
4027       }
4028     }
4029 
4030     if ((ICEArguments & (1 << i)) == 0) {
4031       Ops.push_back(EmitScalarExpr(E->getArg(i)));
4032     } else {
4033       // If this is required to be a constant, constant fold it so that we know
4034       // that the generated intrinsic gets a ConstantInt.
4035       llvm::APSInt Result;
4036       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4037       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
4038       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4039     }
4040   }
4041 
4042   switch (BuiltinID) {
4043   default: break;
4044 
4045   case NEON::BI__builtin_neon_vget_lane_i8:
4046   case NEON::BI__builtin_neon_vget_lane_i16:
4047   case NEON::BI__builtin_neon_vget_lane_i32:
4048   case NEON::BI__builtin_neon_vget_lane_i64:
4049   case NEON::BI__builtin_neon_vget_lane_f32:
4050   case NEON::BI__builtin_neon_vgetq_lane_i8:
4051   case NEON::BI__builtin_neon_vgetq_lane_i16:
4052   case NEON::BI__builtin_neon_vgetq_lane_i32:
4053   case NEON::BI__builtin_neon_vgetq_lane_i64:
4054   case NEON::BI__builtin_neon_vgetq_lane_f32:
4055     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
4056 
4057   case NEON::BI__builtin_neon_vset_lane_i8:
4058   case NEON::BI__builtin_neon_vset_lane_i16:
4059   case NEON::BI__builtin_neon_vset_lane_i32:
4060   case NEON::BI__builtin_neon_vset_lane_i64:
4061   case NEON::BI__builtin_neon_vset_lane_f32:
4062   case NEON::BI__builtin_neon_vsetq_lane_i8:
4063   case NEON::BI__builtin_neon_vsetq_lane_i16:
4064   case NEON::BI__builtin_neon_vsetq_lane_i32:
4065   case NEON::BI__builtin_neon_vsetq_lane_i64:
4066   case NEON::BI__builtin_neon_vsetq_lane_f32:
4067     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4068 
4069   case NEON::BI__builtin_neon_vsha1h_u32:
4070     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
4071                         "vsha1h");
4072   case NEON::BI__builtin_neon_vsha1cq_u32:
4073     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
4074                         "vsha1h");
4075   case NEON::BI__builtin_neon_vsha1pq_u32:
4076     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
4077                         "vsha1h");
4078   case NEON::BI__builtin_neon_vsha1mq_u32:
4079     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
4080                         "vsha1h");
4081 
4082   // The ARM _MoveToCoprocessor builtins put the input register value as
4083   // the first argument, but the LLVM intrinsic expects it as the third one.
4084   case ARM::BI_MoveToCoprocessor:
4085   case ARM::BI_MoveToCoprocessor2: {
4086     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
4087                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
4088     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
4089                                   Ops[3], Ops[4], Ops[5]});
4090   }
4091   }
4092 
4093   // Get the last argument, which specifies the vector type.
4094   assert(HasExtraArg);
4095   llvm::APSInt Result;
4096   const Expr *Arg = E->getArg(E->getNumArgs()-1);
4097   if (!Arg->isIntegerConstantExpr(Result, getContext()))
4098     return nullptr;
4099 
4100   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
4101       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
4102     // Determine the overloaded type of this builtin.
4103     llvm::Type *Ty;
4104     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
4105       Ty = FloatTy;
4106     else
4107       Ty = DoubleTy;
4108 
4109     // Determine whether this is an unsigned conversion or not.
4110     bool usgn = Result.getZExtValue() == 1;
4111     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
4112 
4113     // Call the appropriate intrinsic.
4114     Function *F = CGM.getIntrinsic(Int, Ty);
4115     return Builder.CreateCall(F, Ops, "vcvtr");
4116   }
4117 
4118   // Determine the type of this overloaded NEON intrinsic.
4119   NeonTypeFlags Type(Result.getZExtValue());
4120   bool usgn = Type.isUnsigned();
4121   bool rightShift = false;
4122 
4123   llvm::VectorType *VTy = GetNeonType(this, Type);
4124   llvm::Type *Ty = VTy;
4125   if (!Ty)
4126     return nullptr;
4127 
4128   // Many NEON builtins have identical semantics and uses in ARM and
4129   // AArch64. Emit these in a single function.
4130   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
4131   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4132       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
4133   if (Builtin)
4134     return EmitCommonNeonBuiltinExpr(
4135         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
4136         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
4137 
4138   unsigned Int;
4139   switch (BuiltinID) {
4140   default: return nullptr;
4141   case NEON::BI__builtin_neon_vld1q_lane_v:
4142     // Handle 64-bit integer elements as a special case.  Use shuffles of
4143     // one-element vectors to avoid poor code for i64 in the backend.
4144     if (VTy->getElementType()->isIntegerTy(64)) {
4145       // Extract the other lane.
4146       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4147       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
4148       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
4149       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4150       // Load the value as a one-element vector.
4151       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
4152       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4153       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
4154       Value *Align = getAlignmentValue32(PtrOp0);
4155       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
4156       // Combine them.
4157       uint32_t Indices[] = {1 - Lane, Lane};
4158       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
4159       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
4160     }
4161     // fall through
4162   case NEON::BI__builtin_neon_vld1_lane_v: {
4163     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4164     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
4165     Value *Ld = Builder.CreateLoad(PtrOp0);
4166     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
4167   }
4168   case NEON::BI__builtin_neon_vld2_dup_v:
4169   case NEON::BI__builtin_neon_vld3_dup_v:
4170   case NEON::BI__builtin_neon_vld4_dup_v: {
4171     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
4172     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
4173       switch (BuiltinID) {
4174       case NEON::BI__builtin_neon_vld2_dup_v:
4175         Int = Intrinsic::arm_neon_vld2;
4176         break;
4177       case NEON::BI__builtin_neon_vld3_dup_v:
4178         Int = Intrinsic::arm_neon_vld3;
4179         break;
4180       case NEON::BI__builtin_neon_vld4_dup_v:
4181         Int = Intrinsic::arm_neon_vld4;
4182         break;
4183       default: llvm_unreachable("unknown vld_dup intrinsic?");
4184       }
4185       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4186       Function *F = CGM.getIntrinsic(Int, Tys);
4187       llvm::Value *Align = getAlignmentValue32(PtrOp1);
4188       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
4189       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4190       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4191       return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4192     }
4193     switch (BuiltinID) {
4194     case NEON::BI__builtin_neon_vld2_dup_v:
4195       Int = Intrinsic::arm_neon_vld2lane;
4196       break;
4197     case NEON::BI__builtin_neon_vld3_dup_v:
4198       Int = Intrinsic::arm_neon_vld3lane;
4199       break;
4200     case NEON::BI__builtin_neon_vld4_dup_v:
4201       Int = Intrinsic::arm_neon_vld4lane;
4202       break;
4203     default: llvm_unreachable("unknown vld_dup intrinsic?");
4204     }
4205     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4206     Function *F = CGM.getIntrinsic(Int, Tys);
4207     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
4208 
4209     SmallVector<Value*, 6> Args;
4210     Args.push_back(Ops[1]);
4211     Args.append(STy->getNumElements(), UndefValue::get(Ty));
4212 
4213     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
4214     Args.push_back(CI);
4215     Args.push_back(getAlignmentValue32(PtrOp1));
4216 
4217     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
4218     // splat lane 0 to all elts in each vector of the result.
4219     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
4220       Value *Val = Builder.CreateExtractValue(Ops[1], i);
4221       Value *Elt = Builder.CreateBitCast(Val, Ty);
4222       Elt = EmitNeonSplat(Elt, CI);
4223       Elt = Builder.CreateBitCast(Elt, Val->getType());
4224       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
4225     }
4226     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4227     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4228     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4229   }
4230   case NEON::BI__builtin_neon_vqrshrn_n_v:
4231     Int =
4232       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
4233     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
4234                         1, true);
4235   case NEON::BI__builtin_neon_vqrshrun_n_v:
4236     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
4237                         Ops, "vqrshrun_n", 1, true);
4238   case NEON::BI__builtin_neon_vqshrn_n_v:
4239     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
4240     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
4241                         1, true);
4242   case NEON::BI__builtin_neon_vqshrun_n_v:
4243     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
4244                         Ops, "vqshrun_n", 1, true);
4245   case NEON::BI__builtin_neon_vrecpe_v:
4246   case NEON::BI__builtin_neon_vrecpeq_v:
4247     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
4248                         Ops, "vrecpe");
4249   case NEON::BI__builtin_neon_vrshrn_n_v:
4250     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
4251                         Ops, "vrshrn_n", 1, true);
4252   case NEON::BI__builtin_neon_vrsra_n_v:
4253   case NEON::BI__builtin_neon_vrsraq_n_v:
4254     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4255     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4256     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
4257     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
4258     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
4259     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
4260   case NEON::BI__builtin_neon_vsri_n_v:
4261   case NEON::BI__builtin_neon_vsriq_n_v:
4262     rightShift = true;
4263   case NEON::BI__builtin_neon_vsli_n_v:
4264   case NEON::BI__builtin_neon_vsliq_n_v:
4265     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
4266     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
4267                         Ops, "vsli_n");
4268   case NEON::BI__builtin_neon_vsra_n_v:
4269   case NEON::BI__builtin_neon_vsraq_n_v:
4270     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4271     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
4272     return Builder.CreateAdd(Ops[0], Ops[1]);
4273   case NEON::BI__builtin_neon_vst1q_lane_v:
4274     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
4275     // a one-element vector and avoid poor code for i64 in the backend.
4276     if (VTy->getElementType()->isIntegerTy(64)) {
4277       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4278       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
4279       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4280       Ops[2] = getAlignmentValue32(PtrOp0);
4281       llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
4282       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
4283                                                  Tys), Ops);
4284     }
4285     // fall through
4286   case NEON::BI__builtin_neon_vst1_lane_v: {
4287     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4288     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
4289     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4290     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
4291     return St;
4292   }
4293   case NEON::BI__builtin_neon_vtbl1_v:
4294     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
4295                         Ops, "vtbl1");
4296   case NEON::BI__builtin_neon_vtbl2_v:
4297     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
4298                         Ops, "vtbl2");
4299   case NEON::BI__builtin_neon_vtbl3_v:
4300     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
4301                         Ops, "vtbl3");
4302   case NEON::BI__builtin_neon_vtbl4_v:
4303     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
4304                         Ops, "vtbl4");
4305   case NEON::BI__builtin_neon_vtbx1_v:
4306     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
4307                         Ops, "vtbx1");
4308   case NEON::BI__builtin_neon_vtbx2_v:
4309     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
4310                         Ops, "vtbx2");
4311   case NEON::BI__builtin_neon_vtbx3_v:
4312     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
4313                         Ops, "vtbx3");
4314   case NEON::BI__builtin_neon_vtbx4_v:
4315     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
4316                         Ops, "vtbx4");
4317   }
4318 }
4319 
4320 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
4321                                       const CallExpr *E,
4322                                       SmallVectorImpl<Value *> &Ops) {
4323   unsigned int Int = 0;
4324   const char *s = nullptr;
4325 
4326   switch (BuiltinID) {
4327   default:
4328     return nullptr;
4329   case NEON::BI__builtin_neon_vtbl1_v:
4330   case NEON::BI__builtin_neon_vqtbl1_v:
4331   case NEON::BI__builtin_neon_vqtbl1q_v:
4332   case NEON::BI__builtin_neon_vtbl2_v:
4333   case NEON::BI__builtin_neon_vqtbl2_v:
4334   case NEON::BI__builtin_neon_vqtbl2q_v:
4335   case NEON::BI__builtin_neon_vtbl3_v:
4336   case NEON::BI__builtin_neon_vqtbl3_v:
4337   case NEON::BI__builtin_neon_vqtbl3q_v:
4338   case NEON::BI__builtin_neon_vtbl4_v:
4339   case NEON::BI__builtin_neon_vqtbl4_v:
4340   case NEON::BI__builtin_neon_vqtbl4q_v:
4341     break;
4342   case NEON::BI__builtin_neon_vtbx1_v:
4343   case NEON::BI__builtin_neon_vqtbx1_v:
4344   case NEON::BI__builtin_neon_vqtbx1q_v:
4345   case NEON::BI__builtin_neon_vtbx2_v:
4346   case NEON::BI__builtin_neon_vqtbx2_v:
4347   case NEON::BI__builtin_neon_vqtbx2q_v:
4348   case NEON::BI__builtin_neon_vtbx3_v:
4349   case NEON::BI__builtin_neon_vqtbx3_v:
4350   case NEON::BI__builtin_neon_vqtbx3q_v:
4351   case NEON::BI__builtin_neon_vtbx4_v:
4352   case NEON::BI__builtin_neon_vqtbx4_v:
4353   case NEON::BI__builtin_neon_vqtbx4q_v:
4354     break;
4355   }
4356 
4357   assert(E->getNumArgs() >= 3);
4358 
4359   // Get the last argument, which specifies the vector type.
4360   llvm::APSInt Result;
4361   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
4362   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
4363     return nullptr;
4364 
4365   // Determine the type of this overloaded NEON intrinsic.
4366   NeonTypeFlags Type(Result.getZExtValue());
4367   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
4368   if (!Ty)
4369     return nullptr;
4370 
4371   CodeGen::CGBuilderTy &Builder = CGF.Builder;
4372 
4373   // AArch64 scalar builtins are not overloaded, they do not have an extra
4374   // argument that specifies the vector type, need to handle each case.
4375   switch (BuiltinID) {
4376   case NEON::BI__builtin_neon_vtbl1_v: {
4377     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
4378                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
4379                               "vtbl1");
4380   }
4381   case NEON::BI__builtin_neon_vtbl2_v: {
4382     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
4383                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
4384                               "vtbl1");
4385   }
4386   case NEON::BI__builtin_neon_vtbl3_v: {
4387     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
4388                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
4389                               "vtbl2");
4390   }
4391   case NEON::BI__builtin_neon_vtbl4_v: {
4392     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
4393                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
4394                               "vtbl2");
4395   }
4396   case NEON::BI__builtin_neon_vtbx1_v: {
4397     Value *TblRes =
4398         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
4399                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
4400 
4401     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
4402     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
4403     CmpRes = Builder.CreateSExt(CmpRes, Ty);
4404 
4405     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4406     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4407     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
4408   }
4409   case NEON::BI__builtin_neon_vtbx2_v: {
4410     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
4411                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
4412                               "vtbx1");
4413   }
4414   case NEON::BI__builtin_neon_vtbx3_v: {
4415     Value *TblRes =
4416         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
4417                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
4418 
4419     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
4420     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
4421                                            TwentyFourV);
4422     CmpRes = Builder.CreateSExt(CmpRes, Ty);
4423 
4424     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4425     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4426     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
4427   }
4428   case NEON::BI__builtin_neon_vtbx4_v: {
4429     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
4430                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
4431                               "vtbx2");
4432   }
4433   case NEON::BI__builtin_neon_vqtbl1_v:
4434   case NEON::BI__builtin_neon_vqtbl1q_v:
4435     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
4436   case NEON::BI__builtin_neon_vqtbl2_v:
4437   case NEON::BI__builtin_neon_vqtbl2q_v: {
4438     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
4439   case NEON::BI__builtin_neon_vqtbl3_v:
4440   case NEON::BI__builtin_neon_vqtbl3q_v:
4441     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
4442   case NEON::BI__builtin_neon_vqtbl4_v:
4443   case NEON::BI__builtin_neon_vqtbl4q_v:
4444     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
4445   case NEON::BI__builtin_neon_vqtbx1_v:
4446   case NEON::BI__builtin_neon_vqtbx1q_v:
4447     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
4448   case NEON::BI__builtin_neon_vqtbx2_v:
4449   case NEON::BI__builtin_neon_vqtbx2q_v:
4450     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
4451   case NEON::BI__builtin_neon_vqtbx3_v:
4452   case NEON::BI__builtin_neon_vqtbx3q_v:
4453     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
4454   case NEON::BI__builtin_neon_vqtbx4_v:
4455   case NEON::BI__builtin_neon_vqtbx4q_v:
4456     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
4457   }
4458   }
4459 
4460   if (!Int)
4461     return nullptr;
4462 
4463   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
4464   return CGF.EmitNeonCall(F, Ops, s);
4465 }
4466 
4467 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
4468   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
4469   Op = Builder.CreateBitCast(Op, Int16Ty);
4470   Value *V = UndefValue::get(VTy);
4471   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4472   Op = Builder.CreateInsertElement(V, Op, CI);
4473   return Op;
4474 }
4475 
4476 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
4477                                                const CallExpr *E) {
4478   unsigned HintID = static_cast<unsigned>(-1);
4479   switch (BuiltinID) {
4480   default: break;
4481   case AArch64::BI__builtin_arm_nop:
4482     HintID = 0;
4483     break;
4484   case AArch64::BI__builtin_arm_yield:
4485     HintID = 1;
4486     break;
4487   case AArch64::BI__builtin_arm_wfe:
4488     HintID = 2;
4489     break;
4490   case AArch64::BI__builtin_arm_wfi:
4491     HintID = 3;
4492     break;
4493   case AArch64::BI__builtin_arm_sev:
4494     HintID = 4;
4495     break;
4496   case AArch64::BI__builtin_arm_sevl:
4497     HintID = 5;
4498     break;
4499   }
4500 
4501   if (HintID != static_cast<unsigned>(-1)) {
4502     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
4503     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
4504   }
4505 
4506   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
4507     Value *Address         = EmitScalarExpr(E->getArg(0));
4508     Value *RW              = EmitScalarExpr(E->getArg(1));
4509     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
4510     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
4511     Value *IsData          = EmitScalarExpr(E->getArg(4));
4512 
4513     Value *Locality = nullptr;
4514     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
4515       // Temporal fetch, needs to convert cache level to locality.
4516       Locality = llvm::ConstantInt::get(Int32Ty,
4517         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
4518     } else {
4519       // Streaming fetch.
4520       Locality = llvm::ConstantInt::get(Int32Ty, 0);
4521     }
4522 
4523     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
4524     // PLDL3STRM or PLDL2STRM.
4525     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4526     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4527   }
4528 
4529   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
4530     assert((getContext().getTypeSize(E->getType()) == 32) &&
4531            "rbit of unusual size!");
4532     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4533     return Builder.CreateCall(
4534         CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
4535   }
4536   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
4537     assert((getContext().getTypeSize(E->getType()) == 64) &&
4538            "rbit of unusual size!");
4539     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4540     return Builder.CreateCall(
4541         CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
4542   }
4543 
4544   if (BuiltinID == AArch64::BI__clear_cache) {
4545     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4546     const FunctionDecl *FD = E->getDirectCallee();
4547     Value *Ops[2];
4548     for (unsigned i = 0; i < 2; i++)
4549       Ops[i] = EmitScalarExpr(E->getArg(i));
4550     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4551     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4552     StringRef Name = FD->getName();
4553     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4554   }
4555 
4556   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
4557       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
4558       getContext().getTypeSize(E->getType()) == 128) {
4559     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
4560                                        ? Intrinsic::aarch64_ldaxp
4561                                        : Intrinsic::aarch64_ldxp);
4562 
4563     Value *LdPtr = EmitScalarExpr(E->getArg(0));
4564     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4565                                     "ldxp");
4566 
4567     Value *Val0 = Builder.CreateExtractValue(Val, 1);
4568     Value *Val1 = Builder.CreateExtractValue(Val, 0);
4569     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
4570     Val0 = Builder.CreateZExt(Val0, Int128Ty);
4571     Val1 = Builder.CreateZExt(Val1, Int128Ty);
4572 
4573     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
4574     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4575     Val = Builder.CreateOr(Val, Val1);
4576     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4577   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
4578              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
4579     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4580 
4581     QualType Ty = E->getType();
4582     llvm::Type *RealResTy = ConvertType(Ty);
4583     llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
4584                                                   getContext().getTypeSize(Ty));
4585     LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
4586 
4587     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
4588                                        ? Intrinsic::aarch64_ldaxr
4589                                        : Intrinsic::aarch64_ldxr,
4590                                    LoadAddr->getType());
4591     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
4592 
4593     if (RealResTy->isPointerTy())
4594       return Builder.CreateIntToPtr(Val, RealResTy);
4595 
4596     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4597     return Builder.CreateBitCast(Val, RealResTy);
4598   }
4599 
4600   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
4601        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
4602       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
4603     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
4604                                        ? Intrinsic::aarch64_stlxp
4605                                        : Intrinsic::aarch64_stxp);
4606     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr);
4607 
4608     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4609     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
4610 
4611     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
4612     llvm::Value *Val = Builder.CreateLoad(Tmp);
4613 
4614     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4615     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4616     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
4617                                          Int8PtrTy);
4618     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
4619   }
4620 
4621   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
4622       BuiltinID == AArch64::BI__builtin_arm_stlex) {
4623     Value *StoreVal = EmitScalarExpr(E->getArg(0));
4624     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4625 
4626     QualType Ty = E->getArg(0)->getType();
4627     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4628                                                  getContext().getTypeSize(Ty));
4629     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4630 
4631     if (StoreVal->getType()->isPointerTy())
4632       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
4633     else {
4634       StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
4635       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
4636     }
4637 
4638     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
4639                                        ? Intrinsic::aarch64_stlxr
4640                                        : Intrinsic::aarch64_stxr,
4641                                    StoreAddr->getType());
4642     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
4643   }
4644 
4645   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
4646     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
4647     return Builder.CreateCall(F);
4648   }
4649 
4650   // CRC32
4651   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4652   switch (BuiltinID) {
4653   case AArch64::BI__builtin_arm_crc32b:
4654     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
4655   case AArch64::BI__builtin_arm_crc32cb:
4656     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
4657   case AArch64::BI__builtin_arm_crc32h:
4658     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
4659   case AArch64::BI__builtin_arm_crc32ch:
4660     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
4661   case AArch64::BI__builtin_arm_crc32w:
4662     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
4663   case AArch64::BI__builtin_arm_crc32cw:
4664     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
4665   case AArch64::BI__builtin_arm_crc32d:
4666     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
4667   case AArch64::BI__builtin_arm_crc32cd:
4668     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
4669   }
4670 
4671   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4672     Value *Arg0 = EmitScalarExpr(E->getArg(0));
4673     Value *Arg1 = EmitScalarExpr(E->getArg(1));
4674     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4675 
4676     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
4677     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
4678 
4679     return Builder.CreateCall(F, {Arg0, Arg1});
4680   }
4681 
4682   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
4683       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
4684       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
4685       BuiltinID == AArch64::BI__builtin_arm_wsr ||
4686       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
4687       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
4688 
4689     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
4690                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
4691                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
4692 
4693     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
4694                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
4695 
4696     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
4697                    BuiltinID != AArch64::BI__builtin_arm_wsr;
4698 
4699     llvm::Type *ValueType;
4700     llvm::Type *RegisterType = Int64Ty;
4701     if (IsPointerBuiltin) {
4702       ValueType = VoidPtrTy;
4703     } else if (Is64Bit) {
4704       ValueType = Int64Ty;
4705     } else {
4706       ValueType = Int32Ty;
4707     }
4708 
4709     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4710   }
4711 
4712   // Find out if any arguments are required to be integer constant
4713   // expressions.
4714   unsigned ICEArguments = 0;
4715   ASTContext::GetBuiltinTypeError Error;
4716   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4717   assert(Error == ASTContext::GE_None && "Should not codegen an error");
4718 
4719   llvm::SmallVector<Value*, 4> Ops;
4720   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
4721     if ((ICEArguments & (1 << i)) == 0) {
4722       Ops.push_back(EmitScalarExpr(E->getArg(i)));
4723     } else {
4724       // If this is required to be a constant, constant fold it so that we know
4725       // that the generated intrinsic gets a ConstantInt.
4726       llvm::APSInt Result;
4727       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4728       assert(IsConst && "Constant arg isn't actually constant?");
4729       (void)IsConst;
4730       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4731     }
4732   }
4733 
4734   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
4735   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4736       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
4737 
4738   if (Builtin) {
4739     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
4740     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
4741     assert(Result && "SISD intrinsic should have been handled");
4742     return Result;
4743   }
4744 
4745   llvm::APSInt Result;
4746   const Expr *Arg = E->getArg(E->getNumArgs()-1);
4747   NeonTypeFlags Type(0);
4748   if (Arg->isIntegerConstantExpr(Result, getContext()))
4749     // Determine the type of this overloaded NEON intrinsic.
4750     Type = NeonTypeFlags(Result.getZExtValue());
4751 
4752   bool usgn = Type.isUnsigned();
4753   bool quad = Type.isQuad();
4754 
4755   // Handle non-overloaded intrinsics first.
4756   switch (BuiltinID) {
4757   default: break;
4758   case NEON::BI__builtin_neon_vldrq_p128: {
4759     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
4760     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
4761     return Builder.CreateDefaultAlignedLoad(Ptr);
4762   }
4763   case NEON::BI__builtin_neon_vstrq_p128: {
4764     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
4765     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
4766     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
4767   }
4768   case NEON::BI__builtin_neon_vcvts_u32_f32:
4769   case NEON::BI__builtin_neon_vcvtd_u64_f64:
4770     usgn = true;
4771     // FALL THROUGH
4772   case NEON::BI__builtin_neon_vcvts_s32_f32:
4773   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
4774     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4775     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
4776     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
4777     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
4778     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
4779     if (usgn)
4780       return Builder.CreateFPToUI(Ops[0], InTy);
4781     return Builder.CreateFPToSI(Ops[0], InTy);
4782   }
4783   case NEON::BI__builtin_neon_vcvts_f32_u32:
4784   case NEON::BI__builtin_neon_vcvtd_f64_u64:
4785     usgn = true;
4786     // FALL THROUGH
4787   case NEON::BI__builtin_neon_vcvts_f32_s32:
4788   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
4789     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4790     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
4791     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
4792     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
4793     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
4794     if (usgn)
4795       return Builder.CreateUIToFP(Ops[0], FTy);
4796     return Builder.CreateSIToFP(Ops[0], FTy);
4797   }
4798   case NEON::BI__builtin_neon_vpaddd_s64: {
4799     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
4800     Value *Vec = EmitScalarExpr(E->getArg(0));
4801     // The vector is v2f64, so make sure it's bitcast to that.
4802     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
4803     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
4804     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
4805     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
4806     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
4807     // Pairwise addition of a v2f64 into a scalar f64.
4808     return Builder.CreateAdd(Op0, Op1, "vpaddd");
4809   }
4810   case NEON::BI__builtin_neon_vpaddd_f64: {
4811     llvm::Type *Ty =
4812       llvm::VectorType::get(DoubleTy, 2);
4813     Value *Vec = EmitScalarExpr(E->getArg(0));
4814     // The vector is v2f64, so make sure it's bitcast to that.
4815     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
4816     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
4817     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
4818     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
4819     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
4820     // Pairwise addition of a v2f64 into a scalar f64.
4821     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
4822   }
4823   case NEON::BI__builtin_neon_vpadds_f32: {
4824     llvm::Type *Ty =
4825       llvm::VectorType::get(FloatTy, 2);
4826     Value *Vec = EmitScalarExpr(E->getArg(0));
4827     // The vector is v2f32, so make sure it's bitcast to that.
4828     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
4829     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
4830     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
4831     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
4832     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
4833     // Pairwise addition of a v2f32 into a scalar f32.
4834     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
4835   }
4836   case NEON::BI__builtin_neon_vceqzd_s64:
4837   case NEON::BI__builtin_neon_vceqzd_f64:
4838   case NEON::BI__builtin_neon_vceqzs_f32:
4839     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4840     return EmitAArch64CompareBuiltinExpr(
4841         Ops[0], ConvertType(E->getCallReturnType(getContext())),
4842         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
4843   case NEON::BI__builtin_neon_vcgezd_s64:
4844   case NEON::BI__builtin_neon_vcgezd_f64:
4845   case NEON::BI__builtin_neon_vcgezs_f32:
4846     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4847     return EmitAArch64CompareBuiltinExpr(
4848         Ops[0], ConvertType(E->getCallReturnType(getContext())),
4849         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
4850   case NEON::BI__builtin_neon_vclezd_s64:
4851   case NEON::BI__builtin_neon_vclezd_f64:
4852   case NEON::BI__builtin_neon_vclezs_f32:
4853     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4854     return EmitAArch64CompareBuiltinExpr(
4855         Ops[0], ConvertType(E->getCallReturnType(getContext())),
4856         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
4857   case NEON::BI__builtin_neon_vcgtzd_s64:
4858   case NEON::BI__builtin_neon_vcgtzd_f64:
4859   case NEON::BI__builtin_neon_vcgtzs_f32:
4860     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4861     return EmitAArch64CompareBuiltinExpr(
4862         Ops[0], ConvertType(E->getCallReturnType(getContext())),
4863         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
4864   case NEON::BI__builtin_neon_vcltzd_s64:
4865   case NEON::BI__builtin_neon_vcltzd_f64:
4866   case NEON::BI__builtin_neon_vcltzs_f32:
4867     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4868     return EmitAArch64CompareBuiltinExpr(
4869         Ops[0], ConvertType(E->getCallReturnType(getContext())),
4870         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
4871 
4872   case NEON::BI__builtin_neon_vceqzd_u64: {
4873     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4874     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
4875     Ops[0] =
4876         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
4877     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
4878   }
4879   case NEON::BI__builtin_neon_vceqd_f64:
4880   case NEON::BI__builtin_neon_vcled_f64:
4881   case NEON::BI__builtin_neon_vcltd_f64:
4882   case NEON::BI__builtin_neon_vcged_f64:
4883   case NEON::BI__builtin_neon_vcgtd_f64: {
4884     llvm::CmpInst::Predicate P;
4885     switch (BuiltinID) {
4886     default: llvm_unreachable("missing builtin ID in switch!");
4887     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
4888     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
4889     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
4890     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
4891     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
4892     }
4893     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4894     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
4895     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
4896     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
4897     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
4898   }
4899   case NEON::BI__builtin_neon_vceqs_f32:
4900   case NEON::BI__builtin_neon_vcles_f32:
4901   case NEON::BI__builtin_neon_vclts_f32:
4902   case NEON::BI__builtin_neon_vcges_f32:
4903   case NEON::BI__builtin_neon_vcgts_f32: {
4904     llvm::CmpInst::Predicate P;
4905     switch (BuiltinID) {
4906     default: llvm_unreachable("missing builtin ID in switch!");
4907     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
4908     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
4909     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
4910     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
4911     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
4912     }
4913     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4914     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
4915     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
4916     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
4917     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
4918   }
4919   case NEON::BI__builtin_neon_vceqd_s64:
4920   case NEON::BI__builtin_neon_vceqd_u64:
4921   case NEON::BI__builtin_neon_vcgtd_s64:
4922   case NEON::BI__builtin_neon_vcgtd_u64:
4923   case NEON::BI__builtin_neon_vcltd_s64:
4924   case NEON::BI__builtin_neon_vcltd_u64:
4925   case NEON::BI__builtin_neon_vcged_u64:
4926   case NEON::BI__builtin_neon_vcged_s64:
4927   case NEON::BI__builtin_neon_vcled_u64:
4928   case NEON::BI__builtin_neon_vcled_s64: {
4929     llvm::CmpInst::Predicate P;
4930     switch (BuiltinID) {
4931     default: llvm_unreachable("missing builtin ID in switch!");
4932     case NEON::BI__builtin_neon_vceqd_s64:
4933     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
4934     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
4935     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
4936     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
4937     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
4938     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
4939     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
4940     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
4941     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
4942     }
4943     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4944     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
4945     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
4946     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
4947     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
4948   }
4949   case NEON::BI__builtin_neon_vtstd_s64:
4950   case NEON::BI__builtin_neon_vtstd_u64: {
4951     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4952     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
4953     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
4954     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4955     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4956                                 llvm::Constant::getNullValue(Int64Ty));
4957     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
4958   }
4959   case NEON::BI__builtin_neon_vset_lane_i8:
4960   case NEON::BI__builtin_neon_vset_lane_i16:
4961   case NEON::BI__builtin_neon_vset_lane_i32:
4962   case NEON::BI__builtin_neon_vset_lane_i64:
4963   case NEON::BI__builtin_neon_vset_lane_f32:
4964   case NEON::BI__builtin_neon_vsetq_lane_i8:
4965   case NEON::BI__builtin_neon_vsetq_lane_i16:
4966   case NEON::BI__builtin_neon_vsetq_lane_i32:
4967   case NEON::BI__builtin_neon_vsetq_lane_i64:
4968   case NEON::BI__builtin_neon_vsetq_lane_f32:
4969     Ops.push_back(EmitScalarExpr(E->getArg(2)));
4970     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4971   case NEON::BI__builtin_neon_vset_lane_f64:
4972     // The vector type needs a cast for the v1f64 variant.
4973     Ops[1] = Builder.CreateBitCast(Ops[1],
4974                                    llvm::VectorType::get(DoubleTy, 1));
4975     Ops.push_back(EmitScalarExpr(E->getArg(2)));
4976     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4977   case NEON::BI__builtin_neon_vsetq_lane_f64:
4978     // The vector type needs a cast for the v2f64 variant.
4979     Ops[1] = Builder.CreateBitCast(Ops[1],
4980         llvm::VectorType::get(DoubleTy, 2));
4981     Ops.push_back(EmitScalarExpr(E->getArg(2)));
4982     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4983 
4984   case NEON::BI__builtin_neon_vget_lane_i8:
4985   case NEON::BI__builtin_neon_vdupb_lane_i8:
4986     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
4987     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4988                                         "vget_lane");
4989   case NEON::BI__builtin_neon_vgetq_lane_i8:
4990   case NEON::BI__builtin_neon_vdupb_laneq_i8:
4991     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
4992     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4993                                         "vgetq_lane");
4994   case NEON::BI__builtin_neon_vget_lane_i16:
4995   case NEON::BI__builtin_neon_vduph_lane_i16:
4996     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
4997     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4998                                         "vget_lane");
4999   case NEON::BI__builtin_neon_vgetq_lane_i16:
5000   case NEON::BI__builtin_neon_vduph_laneq_i16:
5001     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
5002     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5003                                         "vgetq_lane");
5004   case NEON::BI__builtin_neon_vget_lane_i32:
5005   case NEON::BI__builtin_neon_vdups_lane_i32:
5006     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
5007     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5008                                         "vget_lane");
5009   case NEON::BI__builtin_neon_vdups_lane_f32:
5010     Ops[0] = Builder.CreateBitCast(Ops[0],
5011         llvm::VectorType::get(FloatTy, 2));
5012     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5013                                         "vdups_lane");
5014   case NEON::BI__builtin_neon_vgetq_lane_i32:
5015   case NEON::BI__builtin_neon_vdups_laneq_i32:
5016     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
5017     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5018                                         "vgetq_lane");
5019   case NEON::BI__builtin_neon_vget_lane_i64:
5020   case NEON::BI__builtin_neon_vdupd_lane_i64:
5021     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
5022     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5023                                         "vget_lane");
5024   case NEON::BI__builtin_neon_vdupd_lane_f64:
5025     Ops[0] = Builder.CreateBitCast(Ops[0],
5026         llvm::VectorType::get(DoubleTy, 1));
5027     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5028                                         "vdupd_lane");
5029   case NEON::BI__builtin_neon_vgetq_lane_i64:
5030   case NEON::BI__builtin_neon_vdupd_laneq_i64:
5031     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
5032     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5033                                         "vgetq_lane");
5034   case NEON::BI__builtin_neon_vget_lane_f32:
5035     Ops[0] = Builder.CreateBitCast(Ops[0],
5036         llvm::VectorType::get(FloatTy, 2));
5037     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5038                                         "vget_lane");
5039   case NEON::BI__builtin_neon_vget_lane_f64:
5040     Ops[0] = Builder.CreateBitCast(Ops[0],
5041         llvm::VectorType::get(DoubleTy, 1));
5042     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5043                                         "vget_lane");
5044   case NEON::BI__builtin_neon_vgetq_lane_f32:
5045   case NEON::BI__builtin_neon_vdups_laneq_f32:
5046     Ops[0] = Builder.CreateBitCast(Ops[0],
5047         llvm::VectorType::get(FloatTy, 4));
5048     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5049                                         "vgetq_lane");
5050   case NEON::BI__builtin_neon_vgetq_lane_f64:
5051   case NEON::BI__builtin_neon_vdupd_laneq_f64:
5052     Ops[0] = Builder.CreateBitCast(Ops[0],
5053         llvm::VectorType::get(DoubleTy, 2));
5054     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5055                                         "vgetq_lane");
5056   case NEON::BI__builtin_neon_vaddd_s64:
5057   case NEON::BI__builtin_neon_vaddd_u64:
5058     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
5059   case NEON::BI__builtin_neon_vsubd_s64:
5060   case NEON::BI__builtin_neon_vsubd_u64:
5061     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
5062   case NEON::BI__builtin_neon_vqdmlalh_s16:
5063   case NEON::BI__builtin_neon_vqdmlslh_s16: {
5064     SmallVector<Value *, 2> ProductOps;
5065     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5066     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
5067     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5068     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5069                           ProductOps, "vqdmlXl");
5070     Constant *CI = ConstantInt::get(SizeTy, 0);
5071     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5072 
5073     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5074                                         ? Intrinsic::aarch64_neon_sqadd
5075                                         : Intrinsic::aarch64_neon_sqsub;
5076     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5077   }
5078   case NEON::BI__builtin_neon_vqshlud_n_s64: {
5079     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5080     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5081     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5082                         Ops, "vqshlu_n");
5083   }
5084   case NEON::BI__builtin_neon_vqshld_n_u64:
5085   case NEON::BI__builtin_neon_vqshld_n_s64: {
5086     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5087                                    ? Intrinsic::aarch64_neon_uqshl
5088                                    : Intrinsic::aarch64_neon_sqshl;
5089     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5090     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5091     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5092   }
5093   case NEON::BI__builtin_neon_vrshrd_n_u64:
5094   case NEON::BI__builtin_neon_vrshrd_n_s64: {
5095     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5096                                    ? Intrinsic::aarch64_neon_urshl
5097                                    : Intrinsic::aarch64_neon_srshl;
5098     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5099     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5100     Ops[1] = ConstantInt::get(Int64Ty, -SV);
5101     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5102   }
5103   case NEON::BI__builtin_neon_vrsrad_n_u64:
5104   case NEON::BI__builtin_neon_vrsrad_n_s64: {
5105     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5106                                    ? Intrinsic::aarch64_neon_urshl
5107                                    : Intrinsic::aarch64_neon_srshl;
5108     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5109     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
5110     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5111                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5112     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5113   }
5114   case NEON::BI__builtin_neon_vshld_n_s64:
5115   case NEON::BI__builtin_neon_vshld_n_u64: {
5116     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5117     return Builder.CreateShl(
5118         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5119   }
5120   case NEON::BI__builtin_neon_vshrd_n_s64: {
5121     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5122     return Builder.CreateAShr(
5123         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5124                                                    Amt->getZExtValue())),
5125         "shrd_n");
5126   }
5127   case NEON::BI__builtin_neon_vshrd_n_u64: {
5128     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5129     uint64_t ShiftAmt = Amt->getZExtValue();
5130     // Right-shifting an unsigned value by its size yields 0.
5131     if (ShiftAmt == 64)
5132       return ConstantInt::get(Int64Ty, 0);
5133     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
5134                               "shrd_n");
5135   }
5136   case NEON::BI__builtin_neon_vsrad_n_s64: {
5137     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5138     Ops[1] = Builder.CreateAShr(
5139         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5140                                                    Amt->getZExtValue())),
5141         "shrd_n");
5142     return Builder.CreateAdd(Ops[0], Ops[1]);
5143   }
5144   case NEON::BI__builtin_neon_vsrad_n_u64: {
5145     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5146     uint64_t ShiftAmt = Amt->getZExtValue();
5147     // Right-shifting an unsigned value by its size yields 0.
5148     // As Op + 0 = Op, return Ops[0] directly.
5149     if (ShiftAmt == 64)
5150       return Ops[0];
5151     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
5152                                 "shrd_n");
5153     return Builder.CreateAdd(Ops[0], Ops[1]);
5154   }
5155   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5156   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5157   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5158   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5159     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5160                                           "lane");
5161     SmallVector<Value *, 2> ProductOps;
5162     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5163     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5164     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5165     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5166                           ProductOps, "vqdmlXl");
5167     Constant *CI = ConstantInt::get(SizeTy, 0);
5168     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5169     Ops.pop_back();
5170 
5171     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5172                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5173                           ? Intrinsic::aarch64_neon_sqadd
5174                           : Intrinsic::aarch64_neon_sqsub;
5175     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
5176   }
5177   case NEON::BI__builtin_neon_vqdmlals_s32:
5178   case NEON::BI__builtin_neon_vqdmlsls_s32: {
5179     SmallVector<Value *, 2> ProductOps;
5180     ProductOps.push_back(Ops[1]);
5181     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
5182     Ops[1] =
5183         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5184                      ProductOps, "vqdmlXl");
5185 
5186     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5187                                         ? Intrinsic::aarch64_neon_sqadd
5188                                         : Intrinsic::aarch64_neon_sqsub;
5189     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
5190   }
5191   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5192   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5193   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5194   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5195     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5196                                           "lane");
5197     SmallVector<Value *, 2> ProductOps;
5198     ProductOps.push_back(Ops[1]);
5199     ProductOps.push_back(Ops[2]);
5200     Ops[1] =
5201         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5202                      ProductOps, "vqdmlXl");
5203     Ops.pop_back();
5204 
5205     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
5206                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
5207                           ? Intrinsic::aarch64_neon_sqadd
5208                           : Intrinsic::aarch64_neon_sqsub;
5209     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
5210   }
5211   }
5212 
5213   llvm::VectorType *VTy = GetNeonType(this, Type);
5214   llvm::Type *Ty = VTy;
5215   if (!Ty)
5216     return nullptr;
5217 
5218   // Not all intrinsics handled by the common case work for AArch64 yet, so only
5219   // defer to common code if it's been added to our special map.
5220   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
5221                                    AArch64SIMDIntrinsicsProvenSorted);
5222 
5223   if (Builtin)
5224     return EmitCommonNeonBuiltinExpr(
5225         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5226         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
5227         /*never use addresses*/ Address::invalid(), Address::invalid());
5228 
5229   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
5230     return V;
5231 
5232   unsigned Int;
5233   switch (BuiltinID) {
5234   default: return nullptr;
5235   case NEON::BI__builtin_neon_vbsl_v:
5236   case NEON::BI__builtin_neon_vbslq_v: {
5237     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
5238     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
5239     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
5240     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
5241 
5242     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
5243     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
5244     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
5245     return Builder.CreateBitCast(Ops[0], Ty);
5246   }
5247   case NEON::BI__builtin_neon_vfma_lane_v:
5248   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
5249     // The ARM builtins (and instructions) have the addend as the first
5250     // operand, but the 'fma' intrinsics have it last. Swap it around here.
5251     Value *Addend = Ops[0];
5252     Value *Multiplicand = Ops[1];
5253     Value *LaneSource = Ops[2];
5254     Ops[0] = Multiplicand;
5255     Ops[1] = LaneSource;
5256     Ops[2] = Addend;
5257 
5258     // Now adjust things to handle the lane access.
5259     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
5260       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
5261       VTy;
5262     llvm::Constant *cst = cast<Constant>(Ops[3]);
5263     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
5264     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
5265     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
5266 
5267     Ops.pop_back();
5268     Int = Intrinsic::fma;
5269     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
5270   }
5271   case NEON::BI__builtin_neon_vfma_laneq_v: {
5272     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
5273     // v1f64 fma should be mapped to Neon scalar f64 fma
5274     if (VTy && VTy->getElementType() == DoubleTy) {
5275       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5276       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5277       llvm::Type *VTy = GetNeonType(this,
5278         NeonTypeFlags(NeonTypeFlags::Float64, false, true));
5279       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
5280       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
5281       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
5282       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5283       return Builder.CreateBitCast(Result, Ty);
5284     }
5285     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5286     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5287     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5288 
5289     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
5290                                             VTy->getNumElements() * 2);
5291     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
5292     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
5293                                                cast<ConstantInt>(Ops[3]));
5294     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
5295 
5296     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
5297   }
5298   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
5299     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5300     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5301     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5302 
5303     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5304     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
5305     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
5306   }
5307   case NEON::BI__builtin_neon_vfmas_lane_f32:
5308   case NEON::BI__builtin_neon_vfmas_laneq_f32:
5309   case NEON::BI__builtin_neon_vfmad_lane_f64:
5310   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
5311     Ops.push_back(EmitScalarExpr(E->getArg(3)));
5312     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
5313     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5314     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
5315     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5316   }
5317   case NEON::BI__builtin_neon_vmull_v:
5318     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5319     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
5320     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
5321     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
5322   case NEON::BI__builtin_neon_vmax_v:
5323   case NEON::BI__builtin_neon_vmaxq_v:
5324     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5325     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
5326     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
5327     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
5328   case NEON::BI__builtin_neon_vmin_v:
5329   case NEON::BI__builtin_neon_vminq_v:
5330     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5331     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
5332     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
5333     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
5334   case NEON::BI__builtin_neon_vabd_v:
5335   case NEON::BI__builtin_neon_vabdq_v:
5336     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5337     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
5338     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
5339     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
5340   case NEON::BI__builtin_neon_vpadal_v:
5341   case NEON::BI__builtin_neon_vpadalq_v: {
5342     unsigned ArgElts = VTy->getNumElements();
5343     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
5344     unsigned BitWidth = EltTy->getBitWidth();
5345     llvm::Type *ArgTy = llvm::VectorType::get(
5346         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
5347     llvm::Type* Tys[2] = { VTy, ArgTy };
5348     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
5349     SmallVector<llvm::Value*, 1> TmpOps;
5350     TmpOps.push_back(Ops[1]);
5351     Function *F = CGM.getIntrinsic(Int, Tys);
5352     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
5353     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
5354     return Builder.CreateAdd(tmp, addend);
5355   }
5356   case NEON::BI__builtin_neon_vpmin_v:
5357   case NEON::BI__builtin_neon_vpminq_v:
5358     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5359     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
5360     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
5361     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
5362   case NEON::BI__builtin_neon_vpmax_v:
5363   case NEON::BI__builtin_neon_vpmaxq_v:
5364     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5365     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
5366     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
5367     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
5368   case NEON::BI__builtin_neon_vminnm_v:
5369   case NEON::BI__builtin_neon_vminnmq_v:
5370     Int = Intrinsic::aarch64_neon_fminnm;
5371     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
5372   case NEON::BI__builtin_neon_vmaxnm_v:
5373   case NEON::BI__builtin_neon_vmaxnmq_v:
5374     Int = Intrinsic::aarch64_neon_fmaxnm;
5375     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
5376   case NEON::BI__builtin_neon_vrecpss_f32: {
5377     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5378     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
5379                         Ops, "vrecps");
5380   }
5381   case NEON::BI__builtin_neon_vrecpsd_f64: {
5382     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5383     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
5384                         Ops, "vrecps");
5385   }
5386   case NEON::BI__builtin_neon_vqshrun_n_v:
5387     Int = Intrinsic::aarch64_neon_sqshrun;
5388     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
5389   case NEON::BI__builtin_neon_vqrshrun_n_v:
5390     Int = Intrinsic::aarch64_neon_sqrshrun;
5391     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
5392   case NEON::BI__builtin_neon_vqshrn_n_v:
5393     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
5394     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
5395   case NEON::BI__builtin_neon_vrshrn_n_v:
5396     Int = Intrinsic::aarch64_neon_rshrn;
5397     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
5398   case NEON::BI__builtin_neon_vqrshrn_n_v:
5399     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
5400     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
5401   case NEON::BI__builtin_neon_vrnda_v:
5402   case NEON::BI__builtin_neon_vrndaq_v: {
5403     Int = Intrinsic::round;
5404     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
5405   }
5406   case NEON::BI__builtin_neon_vrndi_v:
5407   case NEON::BI__builtin_neon_vrndiq_v: {
5408     Int = Intrinsic::nearbyint;
5409     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
5410   }
5411   case NEON::BI__builtin_neon_vrndm_v:
5412   case NEON::BI__builtin_neon_vrndmq_v: {
5413     Int = Intrinsic::floor;
5414     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
5415   }
5416   case NEON::BI__builtin_neon_vrndn_v:
5417   case NEON::BI__builtin_neon_vrndnq_v: {
5418     Int = Intrinsic::aarch64_neon_frintn;
5419     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
5420   }
5421   case NEON::BI__builtin_neon_vrndp_v:
5422   case NEON::BI__builtin_neon_vrndpq_v: {
5423     Int = Intrinsic::ceil;
5424     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
5425   }
5426   case NEON::BI__builtin_neon_vrndx_v:
5427   case NEON::BI__builtin_neon_vrndxq_v: {
5428     Int = Intrinsic::rint;
5429     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
5430   }
5431   case NEON::BI__builtin_neon_vrnd_v:
5432   case NEON::BI__builtin_neon_vrndq_v: {
5433     Int = Intrinsic::trunc;
5434     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
5435   }
5436   case NEON::BI__builtin_neon_vceqz_v:
5437   case NEON::BI__builtin_neon_vceqzq_v:
5438     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
5439                                          ICmpInst::ICMP_EQ, "vceqz");
5440   case NEON::BI__builtin_neon_vcgez_v:
5441   case NEON::BI__builtin_neon_vcgezq_v:
5442     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
5443                                          ICmpInst::ICMP_SGE, "vcgez");
5444   case NEON::BI__builtin_neon_vclez_v:
5445   case NEON::BI__builtin_neon_vclezq_v:
5446     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
5447                                          ICmpInst::ICMP_SLE, "vclez");
5448   case NEON::BI__builtin_neon_vcgtz_v:
5449   case NEON::BI__builtin_neon_vcgtzq_v:
5450     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
5451                                          ICmpInst::ICMP_SGT, "vcgtz");
5452   case NEON::BI__builtin_neon_vcltz_v:
5453   case NEON::BI__builtin_neon_vcltzq_v:
5454     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
5455                                          ICmpInst::ICMP_SLT, "vcltz");
5456   case NEON::BI__builtin_neon_vcvt_f64_v:
5457   case NEON::BI__builtin_neon_vcvtq_f64_v:
5458     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5459     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
5460     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
5461                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
5462   case NEON::BI__builtin_neon_vcvt_f64_f32: {
5463     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
5464            "unexpected vcvt_f64_f32 builtin");
5465     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
5466     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
5467 
5468     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
5469   }
5470   case NEON::BI__builtin_neon_vcvt_f32_f64: {
5471     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
5472            "unexpected vcvt_f32_f64 builtin");
5473     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
5474     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
5475 
5476     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
5477   }
5478   case NEON::BI__builtin_neon_vcvt_s32_v:
5479   case NEON::BI__builtin_neon_vcvt_u32_v:
5480   case NEON::BI__builtin_neon_vcvt_s64_v:
5481   case NEON::BI__builtin_neon_vcvt_u64_v:
5482   case NEON::BI__builtin_neon_vcvtq_s32_v:
5483   case NEON::BI__builtin_neon_vcvtq_u32_v:
5484   case NEON::BI__builtin_neon_vcvtq_s64_v:
5485   case NEON::BI__builtin_neon_vcvtq_u64_v: {
5486     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
5487     if (usgn)
5488       return Builder.CreateFPToUI(Ops[0], Ty);
5489     return Builder.CreateFPToSI(Ops[0], Ty);
5490   }
5491   case NEON::BI__builtin_neon_vcvta_s32_v:
5492   case NEON::BI__builtin_neon_vcvtaq_s32_v:
5493   case NEON::BI__builtin_neon_vcvta_u32_v:
5494   case NEON::BI__builtin_neon_vcvtaq_u32_v:
5495   case NEON::BI__builtin_neon_vcvta_s64_v:
5496   case NEON::BI__builtin_neon_vcvtaq_s64_v:
5497   case NEON::BI__builtin_neon_vcvta_u64_v:
5498   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
5499     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
5500     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5501     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
5502   }
5503   case NEON::BI__builtin_neon_vcvtm_s32_v:
5504   case NEON::BI__builtin_neon_vcvtmq_s32_v:
5505   case NEON::BI__builtin_neon_vcvtm_u32_v:
5506   case NEON::BI__builtin_neon_vcvtmq_u32_v:
5507   case NEON::BI__builtin_neon_vcvtm_s64_v:
5508   case NEON::BI__builtin_neon_vcvtmq_s64_v:
5509   case NEON::BI__builtin_neon_vcvtm_u64_v:
5510   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
5511     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
5512     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5513     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
5514   }
5515   case NEON::BI__builtin_neon_vcvtn_s32_v:
5516   case NEON::BI__builtin_neon_vcvtnq_s32_v:
5517   case NEON::BI__builtin_neon_vcvtn_u32_v:
5518   case NEON::BI__builtin_neon_vcvtnq_u32_v:
5519   case NEON::BI__builtin_neon_vcvtn_s64_v:
5520   case NEON::BI__builtin_neon_vcvtnq_s64_v:
5521   case NEON::BI__builtin_neon_vcvtn_u64_v:
5522   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
5523     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
5524     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5525     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
5526   }
5527   case NEON::BI__builtin_neon_vcvtp_s32_v:
5528   case NEON::BI__builtin_neon_vcvtpq_s32_v:
5529   case NEON::BI__builtin_neon_vcvtp_u32_v:
5530   case NEON::BI__builtin_neon_vcvtpq_u32_v:
5531   case NEON::BI__builtin_neon_vcvtp_s64_v:
5532   case NEON::BI__builtin_neon_vcvtpq_s64_v:
5533   case NEON::BI__builtin_neon_vcvtp_u64_v:
5534   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
5535     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
5536     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5537     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
5538   }
5539   case NEON::BI__builtin_neon_vmulx_v:
5540   case NEON::BI__builtin_neon_vmulxq_v: {
5541     Int = Intrinsic::aarch64_neon_fmulx;
5542     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
5543   }
5544   case NEON::BI__builtin_neon_vmul_lane_v:
5545   case NEON::BI__builtin_neon_vmul_laneq_v: {
5546     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
5547     bool Quad = false;
5548     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
5549       Quad = true;
5550     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5551     llvm::Type *VTy = GetNeonType(this,
5552       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
5553     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
5554     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
5555     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
5556     return Builder.CreateBitCast(Result, Ty);
5557   }
5558   case NEON::BI__builtin_neon_vnegd_s64:
5559     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
5560   case NEON::BI__builtin_neon_vpmaxnm_v:
5561   case NEON::BI__builtin_neon_vpmaxnmq_v: {
5562     Int = Intrinsic::aarch64_neon_fmaxnmp;
5563     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
5564   }
5565   case NEON::BI__builtin_neon_vpminnm_v:
5566   case NEON::BI__builtin_neon_vpminnmq_v: {
5567     Int = Intrinsic::aarch64_neon_fminnmp;
5568     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
5569   }
5570   case NEON::BI__builtin_neon_vsqrt_v:
5571   case NEON::BI__builtin_neon_vsqrtq_v: {
5572     Int = Intrinsic::sqrt;
5573     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5574     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
5575   }
5576   case NEON::BI__builtin_neon_vrbit_v:
5577   case NEON::BI__builtin_neon_vrbitq_v: {
5578     Int = Intrinsic::aarch64_neon_rbit;
5579     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
5580   }
5581   case NEON::BI__builtin_neon_vaddv_u8:
5582     // FIXME: These are handled by the AArch64 scalar code.
5583     usgn = true;
5584     // FALLTHROUGH
5585   case NEON::BI__builtin_neon_vaddv_s8: {
5586     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5587     Ty = Int32Ty;
5588     VTy = llvm::VectorType::get(Int8Ty, 8);
5589     llvm::Type *Tys[2] = { Ty, VTy };
5590     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5591     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5592     return Builder.CreateTrunc(Ops[0], Int8Ty);
5593   }
5594   case NEON::BI__builtin_neon_vaddv_u16:
5595     usgn = true;
5596     // FALLTHROUGH
5597   case NEON::BI__builtin_neon_vaddv_s16: {
5598     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5599     Ty = Int32Ty;
5600     VTy = llvm::VectorType::get(Int16Ty, 4);
5601     llvm::Type *Tys[2] = { Ty, VTy };
5602     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5603     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5604     return Builder.CreateTrunc(Ops[0], Int16Ty);
5605   }
5606   case NEON::BI__builtin_neon_vaddvq_u8:
5607     usgn = true;
5608     // FALLTHROUGH
5609   case NEON::BI__builtin_neon_vaddvq_s8: {
5610     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5611     Ty = Int32Ty;
5612     VTy = llvm::VectorType::get(Int8Ty, 16);
5613     llvm::Type *Tys[2] = { Ty, VTy };
5614     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5615     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5616     return Builder.CreateTrunc(Ops[0], Int8Ty);
5617   }
5618   case NEON::BI__builtin_neon_vaddvq_u16:
5619     usgn = true;
5620     // FALLTHROUGH
5621   case NEON::BI__builtin_neon_vaddvq_s16: {
5622     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5623     Ty = Int32Ty;
5624     VTy = llvm::VectorType::get(Int16Ty, 8);
5625     llvm::Type *Tys[2] = { Ty, VTy };
5626     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5627     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5628     return Builder.CreateTrunc(Ops[0], Int16Ty);
5629   }
5630   case NEON::BI__builtin_neon_vmaxv_u8: {
5631     Int = Intrinsic::aarch64_neon_umaxv;
5632     Ty = Int32Ty;
5633     VTy = llvm::VectorType::get(Int8Ty, 8);
5634     llvm::Type *Tys[2] = { Ty, VTy };
5635     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5636     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5637     return Builder.CreateTrunc(Ops[0], Int8Ty);
5638   }
5639   case NEON::BI__builtin_neon_vmaxv_u16: {
5640     Int = Intrinsic::aarch64_neon_umaxv;
5641     Ty = Int32Ty;
5642     VTy = llvm::VectorType::get(Int16Ty, 4);
5643     llvm::Type *Tys[2] = { Ty, VTy };
5644     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5645     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5646     return Builder.CreateTrunc(Ops[0], Int16Ty);
5647   }
5648   case NEON::BI__builtin_neon_vmaxvq_u8: {
5649     Int = Intrinsic::aarch64_neon_umaxv;
5650     Ty = Int32Ty;
5651     VTy = llvm::VectorType::get(Int8Ty, 16);
5652     llvm::Type *Tys[2] = { Ty, VTy };
5653     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5654     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5655     return Builder.CreateTrunc(Ops[0], Int8Ty);
5656   }
5657   case NEON::BI__builtin_neon_vmaxvq_u16: {
5658     Int = Intrinsic::aarch64_neon_umaxv;
5659     Ty = Int32Ty;
5660     VTy = llvm::VectorType::get(Int16Ty, 8);
5661     llvm::Type *Tys[2] = { Ty, VTy };
5662     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5663     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5664     return Builder.CreateTrunc(Ops[0], Int16Ty);
5665   }
5666   case NEON::BI__builtin_neon_vmaxv_s8: {
5667     Int = Intrinsic::aarch64_neon_smaxv;
5668     Ty = Int32Ty;
5669     VTy = llvm::VectorType::get(Int8Ty, 8);
5670     llvm::Type *Tys[2] = { Ty, VTy };
5671     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5672     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5673     return Builder.CreateTrunc(Ops[0], Int8Ty);
5674   }
5675   case NEON::BI__builtin_neon_vmaxv_s16: {
5676     Int = Intrinsic::aarch64_neon_smaxv;
5677     Ty = Int32Ty;
5678     VTy = llvm::VectorType::get(Int16Ty, 4);
5679     llvm::Type *Tys[2] = { Ty, VTy };
5680     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5681     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5682     return Builder.CreateTrunc(Ops[0], Int16Ty);
5683   }
5684   case NEON::BI__builtin_neon_vmaxvq_s8: {
5685     Int = Intrinsic::aarch64_neon_smaxv;
5686     Ty = Int32Ty;
5687     VTy = llvm::VectorType::get(Int8Ty, 16);
5688     llvm::Type *Tys[2] = { Ty, VTy };
5689     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5690     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5691     return Builder.CreateTrunc(Ops[0], Int8Ty);
5692   }
5693   case NEON::BI__builtin_neon_vmaxvq_s16: {
5694     Int = Intrinsic::aarch64_neon_smaxv;
5695     Ty = Int32Ty;
5696     VTy = llvm::VectorType::get(Int16Ty, 8);
5697     llvm::Type *Tys[2] = { Ty, VTy };
5698     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5699     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5700     return Builder.CreateTrunc(Ops[0], Int16Ty);
5701   }
5702   case NEON::BI__builtin_neon_vminv_u8: {
5703     Int = Intrinsic::aarch64_neon_uminv;
5704     Ty = Int32Ty;
5705     VTy = llvm::VectorType::get(Int8Ty, 8);
5706     llvm::Type *Tys[2] = { Ty, VTy };
5707     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5708     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5709     return Builder.CreateTrunc(Ops[0], Int8Ty);
5710   }
5711   case NEON::BI__builtin_neon_vminv_u16: {
5712     Int = Intrinsic::aarch64_neon_uminv;
5713     Ty = Int32Ty;
5714     VTy = llvm::VectorType::get(Int16Ty, 4);
5715     llvm::Type *Tys[2] = { Ty, VTy };
5716     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5717     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5718     return Builder.CreateTrunc(Ops[0], Int16Ty);
5719   }
5720   case NEON::BI__builtin_neon_vminvq_u8: {
5721     Int = Intrinsic::aarch64_neon_uminv;
5722     Ty = Int32Ty;
5723     VTy = llvm::VectorType::get(Int8Ty, 16);
5724     llvm::Type *Tys[2] = { Ty, VTy };
5725     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5726     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5727     return Builder.CreateTrunc(Ops[0], Int8Ty);
5728   }
5729   case NEON::BI__builtin_neon_vminvq_u16: {
5730     Int = Intrinsic::aarch64_neon_uminv;
5731     Ty = Int32Ty;
5732     VTy = llvm::VectorType::get(Int16Ty, 8);
5733     llvm::Type *Tys[2] = { Ty, VTy };
5734     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5735     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5736     return Builder.CreateTrunc(Ops[0], Int16Ty);
5737   }
5738   case NEON::BI__builtin_neon_vminv_s8: {
5739     Int = Intrinsic::aarch64_neon_sminv;
5740     Ty = Int32Ty;
5741     VTy = llvm::VectorType::get(Int8Ty, 8);
5742     llvm::Type *Tys[2] = { Ty, VTy };
5743     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5744     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5745     return Builder.CreateTrunc(Ops[0], Int8Ty);
5746   }
5747   case NEON::BI__builtin_neon_vminv_s16: {
5748     Int = Intrinsic::aarch64_neon_sminv;
5749     Ty = Int32Ty;
5750     VTy = llvm::VectorType::get(Int16Ty, 4);
5751     llvm::Type *Tys[2] = { Ty, VTy };
5752     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5753     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5754     return Builder.CreateTrunc(Ops[0], Int16Ty);
5755   }
5756   case NEON::BI__builtin_neon_vminvq_s8: {
5757     Int = Intrinsic::aarch64_neon_sminv;
5758     Ty = Int32Ty;
5759     VTy = llvm::VectorType::get(Int8Ty, 16);
5760     llvm::Type *Tys[2] = { Ty, VTy };
5761     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5762     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5763     return Builder.CreateTrunc(Ops[0], Int8Ty);
5764   }
5765   case NEON::BI__builtin_neon_vminvq_s16: {
5766     Int = Intrinsic::aarch64_neon_sminv;
5767     Ty = Int32Ty;
5768     VTy = llvm::VectorType::get(Int16Ty, 8);
5769     llvm::Type *Tys[2] = { Ty, VTy };
5770     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5771     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5772     return Builder.CreateTrunc(Ops[0], Int16Ty);
5773   }
5774   case NEON::BI__builtin_neon_vmul_n_f64: {
5775     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5776     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
5777     return Builder.CreateFMul(Ops[0], RHS);
5778   }
5779   case NEON::BI__builtin_neon_vaddlv_u8: {
5780     Int = Intrinsic::aarch64_neon_uaddlv;
5781     Ty = Int32Ty;
5782     VTy = llvm::VectorType::get(Int8Ty, 8);
5783     llvm::Type *Tys[2] = { Ty, VTy };
5784     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5785     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5786     return Builder.CreateTrunc(Ops[0], Int16Ty);
5787   }
5788   case NEON::BI__builtin_neon_vaddlv_u16: {
5789     Int = Intrinsic::aarch64_neon_uaddlv;
5790     Ty = Int32Ty;
5791     VTy = llvm::VectorType::get(Int16Ty, 4);
5792     llvm::Type *Tys[2] = { Ty, VTy };
5793     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5794     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5795   }
5796   case NEON::BI__builtin_neon_vaddlvq_u8: {
5797     Int = Intrinsic::aarch64_neon_uaddlv;
5798     Ty = Int32Ty;
5799     VTy = llvm::VectorType::get(Int8Ty, 16);
5800     llvm::Type *Tys[2] = { Ty, VTy };
5801     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5802     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5803     return Builder.CreateTrunc(Ops[0], Int16Ty);
5804   }
5805   case NEON::BI__builtin_neon_vaddlvq_u16: {
5806     Int = Intrinsic::aarch64_neon_uaddlv;
5807     Ty = Int32Ty;
5808     VTy = llvm::VectorType::get(Int16Ty, 8);
5809     llvm::Type *Tys[2] = { Ty, VTy };
5810     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5811     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5812   }
5813   case NEON::BI__builtin_neon_vaddlv_s8: {
5814     Int = Intrinsic::aarch64_neon_saddlv;
5815     Ty = Int32Ty;
5816     VTy = llvm::VectorType::get(Int8Ty, 8);
5817     llvm::Type *Tys[2] = { Ty, VTy };
5818     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5819     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5820     return Builder.CreateTrunc(Ops[0], Int16Ty);
5821   }
5822   case NEON::BI__builtin_neon_vaddlv_s16: {
5823     Int = Intrinsic::aarch64_neon_saddlv;
5824     Ty = Int32Ty;
5825     VTy = llvm::VectorType::get(Int16Ty, 4);
5826     llvm::Type *Tys[2] = { Ty, VTy };
5827     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5828     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5829   }
5830   case NEON::BI__builtin_neon_vaddlvq_s8: {
5831     Int = Intrinsic::aarch64_neon_saddlv;
5832     Ty = Int32Ty;
5833     VTy = llvm::VectorType::get(Int8Ty, 16);
5834     llvm::Type *Tys[2] = { Ty, VTy };
5835     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5836     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5837     return Builder.CreateTrunc(Ops[0], Int16Ty);
5838   }
5839   case NEON::BI__builtin_neon_vaddlvq_s16: {
5840     Int = Intrinsic::aarch64_neon_saddlv;
5841     Ty = Int32Ty;
5842     VTy = llvm::VectorType::get(Int16Ty, 8);
5843     llvm::Type *Tys[2] = { Ty, VTy };
5844     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5845     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5846   }
5847   case NEON::BI__builtin_neon_vsri_n_v:
5848   case NEON::BI__builtin_neon_vsriq_n_v: {
5849     Int = Intrinsic::aarch64_neon_vsri;
5850     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
5851     return EmitNeonCall(Intrin, Ops, "vsri_n");
5852   }
5853   case NEON::BI__builtin_neon_vsli_n_v:
5854   case NEON::BI__builtin_neon_vsliq_n_v: {
5855     Int = Intrinsic::aarch64_neon_vsli;
5856     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
5857     return EmitNeonCall(Intrin, Ops, "vsli_n");
5858   }
5859   case NEON::BI__builtin_neon_vsra_n_v:
5860   case NEON::BI__builtin_neon_vsraq_n_v:
5861     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5862     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
5863     return Builder.CreateAdd(Ops[0], Ops[1]);
5864   case NEON::BI__builtin_neon_vrsra_n_v:
5865   case NEON::BI__builtin_neon_vrsraq_n_v: {
5866     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
5867     SmallVector<llvm::Value*,2> TmpOps;
5868     TmpOps.push_back(Ops[1]);
5869     TmpOps.push_back(Ops[2]);
5870     Function* F = CGM.getIntrinsic(Int, Ty);
5871     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
5872     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
5873     return Builder.CreateAdd(Ops[0], tmp);
5874   }
5875     // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
5876     // of an Align parameter here.
5877   case NEON::BI__builtin_neon_vld1_x2_v:
5878   case NEON::BI__builtin_neon_vld1q_x2_v:
5879   case NEON::BI__builtin_neon_vld1_x3_v:
5880   case NEON::BI__builtin_neon_vld1q_x3_v:
5881   case NEON::BI__builtin_neon_vld1_x4_v:
5882   case NEON::BI__builtin_neon_vld1q_x4_v: {
5883     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
5884     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5885     llvm::Type *Tys[2] = { VTy, PTy };
5886     unsigned Int;
5887     switch (BuiltinID) {
5888     case NEON::BI__builtin_neon_vld1_x2_v:
5889     case NEON::BI__builtin_neon_vld1q_x2_v:
5890       Int = Intrinsic::aarch64_neon_ld1x2;
5891       break;
5892     case NEON::BI__builtin_neon_vld1_x3_v:
5893     case NEON::BI__builtin_neon_vld1q_x3_v:
5894       Int = Intrinsic::aarch64_neon_ld1x3;
5895       break;
5896     case NEON::BI__builtin_neon_vld1_x4_v:
5897     case NEON::BI__builtin_neon_vld1q_x4_v:
5898       Int = Intrinsic::aarch64_neon_ld1x4;
5899       break;
5900     }
5901     Function *F = CGM.getIntrinsic(Int, Tys);
5902     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
5903     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5904     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5905     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5906   }
5907   case NEON::BI__builtin_neon_vst1_x2_v:
5908   case NEON::BI__builtin_neon_vst1q_x2_v:
5909   case NEON::BI__builtin_neon_vst1_x3_v:
5910   case NEON::BI__builtin_neon_vst1q_x3_v:
5911   case NEON::BI__builtin_neon_vst1_x4_v:
5912   case NEON::BI__builtin_neon_vst1q_x4_v: {
5913     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
5914     llvm::Type *Tys[2] = { VTy, PTy };
5915     unsigned Int;
5916     switch (BuiltinID) {
5917     case NEON::BI__builtin_neon_vst1_x2_v:
5918     case NEON::BI__builtin_neon_vst1q_x2_v:
5919       Int = Intrinsic::aarch64_neon_st1x2;
5920       break;
5921     case NEON::BI__builtin_neon_vst1_x3_v:
5922     case NEON::BI__builtin_neon_vst1q_x3_v:
5923       Int = Intrinsic::aarch64_neon_st1x3;
5924       break;
5925     case NEON::BI__builtin_neon_vst1_x4_v:
5926     case NEON::BI__builtin_neon_vst1q_x4_v:
5927       Int = Intrinsic::aarch64_neon_st1x4;
5928       break;
5929     }
5930     std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
5931     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
5932   }
5933   case NEON::BI__builtin_neon_vld1_v:
5934   case NEON::BI__builtin_neon_vld1q_v:
5935     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
5936     return Builder.CreateDefaultAlignedLoad(Ops[0]);
5937   case NEON::BI__builtin_neon_vst1_v:
5938   case NEON::BI__builtin_neon_vst1q_v:
5939     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
5940     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
5941     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5942   case NEON::BI__builtin_neon_vld1_lane_v:
5943   case NEON::BI__builtin_neon_vld1q_lane_v:
5944     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5945     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
5946     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5947     Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
5948     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
5949   case NEON::BI__builtin_neon_vld1_dup_v:
5950   case NEON::BI__builtin_neon_vld1q_dup_v: {
5951     Value *V = UndefValue::get(Ty);
5952     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
5953     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5954     Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
5955     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
5956     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
5957     return EmitNeonSplat(Ops[0], CI);
5958   }
5959   case NEON::BI__builtin_neon_vst1_lane_v:
5960   case NEON::BI__builtin_neon_vst1q_lane_v:
5961     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5962     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
5963     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5964     return Builder.CreateDefaultAlignedStore(Ops[1],
5965                                              Builder.CreateBitCast(Ops[0], Ty));
5966   case NEON::BI__builtin_neon_vld2_v:
5967   case NEON::BI__builtin_neon_vld2q_v: {
5968     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
5969     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5970     llvm::Type *Tys[2] = { VTy, PTy };
5971     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
5972     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
5973     Ops[0] = Builder.CreateBitCast(Ops[0],
5974                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5975     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5976   }
5977   case NEON::BI__builtin_neon_vld3_v:
5978   case NEON::BI__builtin_neon_vld3q_v: {
5979     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
5980     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5981     llvm::Type *Tys[2] = { VTy, PTy };
5982     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
5983     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
5984     Ops[0] = Builder.CreateBitCast(Ops[0],
5985                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5986     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5987   }
5988   case NEON::BI__builtin_neon_vld4_v:
5989   case NEON::BI__builtin_neon_vld4q_v: {
5990     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
5991     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5992     llvm::Type *Tys[2] = { VTy, PTy };
5993     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
5994     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
5995     Ops[0] = Builder.CreateBitCast(Ops[0],
5996                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5997     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5998   }
5999   case NEON::BI__builtin_neon_vld2_dup_v:
6000   case NEON::BI__builtin_neon_vld2q_dup_v: {
6001     llvm::Type *PTy =
6002       llvm::PointerType::getUnqual(VTy->getElementType());
6003     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6004     llvm::Type *Tys[2] = { VTy, PTy };
6005     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6006     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6007     Ops[0] = Builder.CreateBitCast(Ops[0],
6008                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6009     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6010   }
6011   case NEON::BI__builtin_neon_vld3_dup_v:
6012   case NEON::BI__builtin_neon_vld3q_dup_v: {
6013     llvm::Type *PTy =
6014       llvm::PointerType::getUnqual(VTy->getElementType());
6015     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6016     llvm::Type *Tys[2] = { VTy, PTy };
6017     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6018     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6019     Ops[0] = Builder.CreateBitCast(Ops[0],
6020                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6021     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6022   }
6023   case NEON::BI__builtin_neon_vld4_dup_v:
6024   case NEON::BI__builtin_neon_vld4q_dup_v: {
6025     llvm::Type *PTy =
6026       llvm::PointerType::getUnqual(VTy->getElementType());
6027     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6028     llvm::Type *Tys[2] = { VTy, PTy };
6029     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6030     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6031     Ops[0] = Builder.CreateBitCast(Ops[0],
6032                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6033     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6034   }
6035   case NEON::BI__builtin_neon_vld2_lane_v:
6036   case NEON::BI__builtin_neon_vld2q_lane_v: {
6037     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6038     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6039     Ops.push_back(Ops[1]);
6040     Ops.erase(Ops.begin()+1);
6041     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6042     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6043     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6044     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
6045     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6046     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6047     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6048   }
6049   case NEON::BI__builtin_neon_vld3_lane_v:
6050   case NEON::BI__builtin_neon_vld3q_lane_v: {
6051     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6052     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6053     Ops.push_back(Ops[1]);
6054     Ops.erase(Ops.begin()+1);
6055     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6056     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6057     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6058     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6059     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
6060     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6061     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6062     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6063   }
6064   case NEON::BI__builtin_neon_vld4_lane_v:
6065   case NEON::BI__builtin_neon_vld4q_lane_v: {
6066     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6067     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6068     Ops.push_back(Ops[1]);
6069     Ops.erase(Ops.begin()+1);
6070     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6071     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6072     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6073     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6074     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6075     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
6076     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6077     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6078     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6079   }
6080   case NEON::BI__builtin_neon_vst2_v:
6081   case NEON::BI__builtin_neon_vst2q_v: {
6082     Ops.push_back(Ops[0]);
6083     Ops.erase(Ops.begin());
6084     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6085     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6086                         Ops, "");
6087   }
6088   case NEON::BI__builtin_neon_vst2_lane_v:
6089   case NEON::BI__builtin_neon_vst2q_lane_v: {
6090     Ops.push_back(Ops[0]);
6091     Ops.erase(Ops.begin());
6092     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6093     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6094     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6095                         Ops, "");
6096   }
6097   case NEON::BI__builtin_neon_vst3_v:
6098   case NEON::BI__builtin_neon_vst3q_v: {
6099     Ops.push_back(Ops[0]);
6100     Ops.erase(Ops.begin());
6101     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6102     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6103                         Ops, "");
6104   }
6105   case NEON::BI__builtin_neon_vst3_lane_v:
6106   case NEON::BI__builtin_neon_vst3q_lane_v: {
6107     Ops.push_back(Ops[0]);
6108     Ops.erase(Ops.begin());
6109     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6110     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6111     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6112                         Ops, "");
6113   }
6114   case NEON::BI__builtin_neon_vst4_v:
6115   case NEON::BI__builtin_neon_vst4q_v: {
6116     Ops.push_back(Ops[0]);
6117     Ops.erase(Ops.begin());
6118     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6119     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6120                         Ops, "");
6121   }
6122   case NEON::BI__builtin_neon_vst4_lane_v:
6123   case NEON::BI__builtin_neon_vst4q_lane_v: {
6124     Ops.push_back(Ops[0]);
6125     Ops.erase(Ops.begin());
6126     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6127     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6128     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6129                         Ops, "");
6130   }
6131   case NEON::BI__builtin_neon_vtrn_v:
6132   case NEON::BI__builtin_neon_vtrnq_v: {
6133     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6134     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6135     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6136     Value *SV = nullptr;
6137 
6138     for (unsigned vi = 0; vi != 2; ++vi) {
6139       SmallVector<Constant*, 16> Indices;
6140       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6141         Indices.push_back(ConstantInt::get(Int32Ty, i+vi));
6142         Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi));
6143       }
6144       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6145       SV = llvm::ConstantVector::get(Indices);
6146       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
6147       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6148     }
6149     return SV;
6150   }
6151   case NEON::BI__builtin_neon_vuzp_v:
6152   case NEON::BI__builtin_neon_vuzpq_v: {
6153     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6154     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6155     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6156     Value *SV = nullptr;
6157 
6158     for (unsigned vi = 0; vi != 2; ++vi) {
6159       SmallVector<Constant*, 16> Indices;
6160       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6161         Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
6162 
6163       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6164       SV = llvm::ConstantVector::get(Indices);
6165       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
6166       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6167     }
6168     return SV;
6169   }
6170   case NEON::BI__builtin_neon_vzip_v:
6171   case NEON::BI__builtin_neon_vzipq_v: {
6172     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6173     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6174     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6175     Value *SV = nullptr;
6176 
6177     for (unsigned vi = 0; vi != 2; ++vi) {
6178       SmallVector<Constant*, 16> Indices;
6179       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6180         Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
6181         Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
6182       }
6183       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6184       SV = llvm::ConstantVector::get(Indices);
6185       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
6186       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6187     }
6188     return SV;
6189   }
6190   case NEON::BI__builtin_neon_vqtbl1q_v: {
6191     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
6192                         Ops, "vtbl1");
6193   }
6194   case NEON::BI__builtin_neon_vqtbl2q_v: {
6195     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
6196                         Ops, "vtbl2");
6197   }
6198   case NEON::BI__builtin_neon_vqtbl3q_v: {
6199     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
6200                         Ops, "vtbl3");
6201   }
6202   case NEON::BI__builtin_neon_vqtbl4q_v: {
6203     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
6204                         Ops, "vtbl4");
6205   }
6206   case NEON::BI__builtin_neon_vqtbx1q_v: {
6207     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
6208                         Ops, "vtbx1");
6209   }
6210   case NEON::BI__builtin_neon_vqtbx2q_v: {
6211     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
6212                         Ops, "vtbx2");
6213   }
6214   case NEON::BI__builtin_neon_vqtbx3q_v: {
6215     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
6216                         Ops, "vtbx3");
6217   }
6218   case NEON::BI__builtin_neon_vqtbx4q_v: {
6219     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
6220                         Ops, "vtbx4");
6221   }
6222   case NEON::BI__builtin_neon_vsqadd_v:
6223   case NEON::BI__builtin_neon_vsqaddq_v: {
6224     Int = Intrinsic::aarch64_neon_usqadd;
6225     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
6226   }
6227   case NEON::BI__builtin_neon_vuqadd_v:
6228   case NEON::BI__builtin_neon_vuqaddq_v: {
6229     Int = Intrinsic::aarch64_neon_suqadd;
6230     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
6231   }
6232   }
6233 }
6234 
6235 llvm::Value *CodeGenFunction::
6236 BuildVector(ArrayRef<llvm::Value*> Ops) {
6237   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
6238          "Not a power-of-two sized vector!");
6239   bool AllConstants = true;
6240   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
6241     AllConstants &= isa<Constant>(Ops[i]);
6242 
6243   // If this is a constant vector, create a ConstantVector.
6244   if (AllConstants) {
6245     SmallVector<llvm::Constant*, 16> CstOps;
6246     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6247       CstOps.push_back(cast<Constant>(Ops[i]));
6248     return llvm::ConstantVector::get(CstOps);
6249   }
6250 
6251   // Otherwise, insertelement the values to build the vector.
6252   Value *Result =
6253     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
6254 
6255   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6256     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
6257 
6258   return Result;
6259 }
6260 
6261 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
6262                                            const CallExpr *E) {
6263   if (BuiltinID == X86::BI__builtin_ms_va_start ||
6264       BuiltinID == X86::BI__builtin_ms_va_end)
6265     return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
6266                           BuiltinID == X86::BI__builtin_ms_va_start);
6267   if (BuiltinID == X86::BI__builtin_ms_va_copy) {
6268     // Lower this manually. We can't reliably determine whether or not any
6269     // given va_copy() is for a Win64 va_list from the calling convention
6270     // alone, because it's legal to do this from a System V ABI function.
6271     // With opaque pointer types, we won't have enough information in LLVM
6272     // IR to determine this from the argument types, either. Best to do it
6273     // now, while we have enough information.
6274     Address DestAddr = EmitMSVAListRef(E->getArg(0));
6275     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6276 
6277     llvm::Type *BPP = Int8PtrPtrTy;
6278 
6279     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
6280                        DestAddr.getAlignment());
6281     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
6282                       SrcAddr.getAlignment());
6283 
6284     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6285     return Builder.CreateStore(ArgPtr, DestAddr);
6286   }
6287 
6288   SmallVector<Value*, 4> Ops;
6289 
6290   // Find out if any arguments are required to be integer constant expressions.
6291   unsigned ICEArguments = 0;
6292   ASTContext::GetBuiltinTypeError Error;
6293   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6294   assert(Error == ASTContext::GE_None && "Should not codegen an error");
6295 
6296   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
6297     // If this is a normal argument, just emit it as a scalar.
6298     if ((ICEArguments & (1 << i)) == 0) {
6299       Ops.push_back(EmitScalarExpr(E->getArg(i)));
6300       continue;
6301     }
6302 
6303     // If this is required to be a constant, constant fold it so that we know
6304     // that the generated intrinsic gets a ConstantInt.
6305     llvm::APSInt Result;
6306     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
6307     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
6308     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
6309   }
6310 
6311   switch (BuiltinID) {
6312   default: return nullptr;
6313   case X86::BI__builtin_cpu_supports: {
6314     const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
6315     StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
6316 
6317     // TODO: When/if this becomes more than x86 specific then use a TargetInfo
6318     // based mapping.
6319     // Processor features and mapping to processor feature value.
6320     enum X86Features {
6321       CMOV = 0,
6322       MMX,
6323       POPCNT,
6324       SSE,
6325       SSE2,
6326       SSE3,
6327       SSSE3,
6328       SSE4_1,
6329       SSE4_2,
6330       AVX,
6331       AVX2,
6332       SSE4_A,
6333       FMA4,
6334       XOP,
6335       FMA,
6336       AVX512F,
6337       BMI,
6338       BMI2,
6339       MAX
6340     };
6341 
6342     X86Features Feature = StringSwitch<X86Features>(FeatureStr)
6343                               .Case("cmov", X86Features::CMOV)
6344                               .Case("mmx", X86Features::MMX)
6345                               .Case("popcnt", X86Features::POPCNT)
6346                               .Case("sse", X86Features::SSE)
6347                               .Case("sse2", X86Features::SSE2)
6348                               .Case("sse3", X86Features::SSE3)
6349                               .Case("sse4.1", X86Features::SSE4_1)
6350                               .Case("sse4.2", X86Features::SSE4_2)
6351                               .Case("avx", X86Features::AVX)
6352                               .Case("avx2", X86Features::AVX2)
6353                               .Case("sse4a", X86Features::SSE4_A)
6354                               .Case("fma4", X86Features::FMA4)
6355                               .Case("xop", X86Features::XOP)
6356                               .Case("fma", X86Features::FMA)
6357                               .Case("avx512f", X86Features::AVX512F)
6358                               .Case("bmi", X86Features::BMI)
6359                               .Case("bmi2", X86Features::BMI2)
6360                               .Default(X86Features::MAX);
6361     assert(Feature != X86Features::MAX && "Invalid feature!");
6362 
6363     // Matching the struct layout from the compiler-rt/libgcc structure that is
6364     // filled in:
6365     // unsigned int __cpu_vendor;
6366     // unsigned int __cpu_type;
6367     // unsigned int __cpu_subtype;
6368     // unsigned int __cpu_features[1];
6369     llvm::Type *STy = llvm::StructType::get(
6370         Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr);
6371 
6372     // Grab the global __cpu_model.
6373     llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
6374 
6375     // Grab the first (0th) element from the field __cpu_features off of the
6376     // global in the struct STy.
6377     Value *Idxs[] = {
6378       ConstantInt::get(Int32Ty, 0),
6379       ConstantInt::get(Int32Ty, 3),
6380       ConstantInt::get(Int32Ty, 0)
6381     };
6382     Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
6383     Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
6384                                                 CharUnits::fromQuantity(4));
6385 
6386     // Check the value of the bit corresponding to the feature requested.
6387     Value *Bitset = Builder.CreateAnd(
6388         Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
6389     return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
6390   }
6391   case X86::BI_mm_prefetch: {
6392     Value *Address = Ops[0];
6393     Value *RW = ConstantInt::get(Int32Ty, 0);
6394     Value *Locality = Ops[1];
6395     Value *Data = ConstantInt::get(Int32Ty, 1);
6396     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
6397     return Builder.CreateCall(F, {Address, RW, Locality, Data});
6398   }
6399   case X86::BI__builtin_ia32_undef128:
6400   case X86::BI__builtin_ia32_undef256:
6401   case X86::BI__builtin_ia32_undef512:
6402     return UndefValue::get(ConvertType(E->getType()));
6403   case X86::BI__builtin_ia32_vec_init_v8qi:
6404   case X86::BI__builtin_ia32_vec_init_v4hi:
6405   case X86::BI__builtin_ia32_vec_init_v2si:
6406     return Builder.CreateBitCast(BuildVector(Ops),
6407                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
6408   case X86::BI__builtin_ia32_vec_ext_v2si:
6409     return Builder.CreateExtractElement(Ops[0],
6410                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
6411   case X86::BI__builtin_ia32_ldmxcsr: {
6412     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
6413     Builder.CreateStore(Ops[0], Tmp);
6414     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
6415                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
6416   }
6417   case X86::BI__builtin_ia32_stmxcsr: {
6418     Address Tmp = CreateMemTemp(E->getType());
6419     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
6420                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
6421     return Builder.CreateLoad(Tmp, "stmxcsr");
6422   }
6423   case X86::BI__builtin_ia32_xsave:
6424   case X86::BI__builtin_ia32_xsave64:
6425   case X86::BI__builtin_ia32_xrstor:
6426   case X86::BI__builtin_ia32_xrstor64:
6427   case X86::BI__builtin_ia32_xsaveopt:
6428   case X86::BI__builtin_ia32_xsaveopt64:
6429   case X86::BI__builtin_ia32_xrstors:
6430   case X86::BI__builtin_ia32_xrstors64:
6431   case X86::BI__builtin_ia32_xsavec:
6432   case X86::BI__builtin_ia32_xsavec64:
6433   case X86::BI__builtin_ia32_xsaves:
6434   case X86::BI__builtin_ia32_xsaves64: {
6435     Intrinsic::ID ID;
6436 #define INTRINSIC_X86_XSAVE_ID(NAME) \
6437     case X86::BI__builtin_ia32_##NAME: \
6438       ID = Intrinsic::x86_##NAME; \
6439       break
6440     switch (BuiltinID) {
6441     default: llvm_unreachable("Unsupported intrinsic!");
6442     INTRINSIC_X86_XSAVE_ID(xsave);
6443     INTRINSIC_X86_XSAVE_ID(xsave64);
6444     INTRINSIC_X86_XSAVE_ID(xrstor);
6445     INTRINSIC_X86_XSAVE_ID(xrstor64);
6446     INTRINSIC_X86_XSAVE_ID(xsaveopt);
6447     INTRINSIC_X86_XSAVE_ID(xsaveopt64);
6448     INTRINSIC_X86_XSAVE_ID(xrstors);
6449     INTRINSIC_X86_XSAVE_ID(xrstors64);
6450     INTRINSIC_X86_XSAVE_ID(xsavec);
6451     INTRINSIC_X86_XSAVE_ID(xsavec64);
6452     INTRINSIC_X86_XSAVE_ID(xsaves);
6453     INTRINSIC_X86_XSAVE_ID(xsaves64);
6454     }
6455 #undef INTRINSIC_X86_XSAVE_ID
6456     Value *Mhi = Builder.CreateTrunc(
6457       Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
6458     Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
6459     Ops[1] = Mhi;
6460     Ops.push_back(Mlo);
6461     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
6462   }
6463   case X86::BI__builtin_ia32_storehps:
6464   case X86::BI__builtin_ia32_storelps: {
6465     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
6466     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
6467 
6468     // cast val v2i64
6469     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
6470 
6471     // extract (0, 1)
6472     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
6473     llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
6474     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
6475 
6476     // cast pointer to i64 & store
6477     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
6478     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6479   }
6480   case X86::BI__builtin_ia32_palignr128:
6481   case X86::BI__builtin_ia32_palignr256: {
6482     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
6483 
6484     unsigned NumElts =
6485       cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
6486     assert(NumElts % 16 == 0);
6487     unsigned NumLanes = NumElts / 16;
6488     unsigned NumLaneElts = NumElts / NumLanes;
6489 
6490     // If palignr is shifting the pair of vectors more than the size of two
6491     // lanes, emit zero.
6492     if (ShiftVal >= (2 * NumLaneElts))
6493       return llvm::Constant::getNullValue(ConvertType(E->getType()));
6494 
6495     // If palignr is shifting the pair of input vectors more than one lane,
6496     // but less than two lanes, convert to shifting in zeroes.
6497     if (ShiftVal > NumLaneElts) {
6498       ShiftVal -= NumLaneElts;
6499       Ops[1] = Ops[0];
6500       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
6501     }
6502 
6503     uint32_t Indices[32];
6504     // 256-bit palignr operates on 128-bit lanes so we need to handle that
6505     for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
6506       for (unsigned i = 0; i != NumLaneElts; ++i) {
6507         unsigned Idx = ShiftVal + i;
6508         if (Idx >= NumLaneElts)
6509           Idx += NumElts - NumLaneElts; // End of lane, switch operand.
6510         Indices[l + i] = Idx + l;
6511       }
6512     }
6513 
6514     Value *SV = llvm::ConstantDataVector::get(getLLVMContext(),
6515                                               makeArrayRef(Indices, NumElts));
6516     return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
6517   }
6518   case X86::BI__builtin_ia32_pslldqi256: {
6519     // Shift value is in bits so divide by 8.
6520     unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
6521 
6522     // If pslldq is shifting the vector more than 15 bytes, emit zero.
6523     if (shiftVal >= 16)
6524       return llvm::Constant::getNullValue(ConvertType(E->getType()));
6525 
6526     uint32_t Indices[32];
6527     // 256-bit pslldq operates on 128-bit lanes so we need to handle that
6528     for (unsigned l = 0; l != 32; l += 16) {
6529       for (unsigned i = 0; i != 16; ++i) {
6530         unsigned Idx = 32 + i - shiftVal;
6531         if (Idx < 32) Idx -= 16; // end of lane, switch operand.
6532         Indices[l + i] = Idx + l;
6533       }
6534     }
6535 
6536     llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
6537     Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
6538     Value *Zero = llvm::Constant::getNullValue(VecTy);
6539 
6540     Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
6541     SV = Builder.CreateShuffleVector(Zero, Ops[0], SV, "pslldq");
6542     llvm::Type *ResultType = ConvertType(E->getType());
6543     return Builder.CreateBitCast(SV, ResultType, "cast");
6544   }
6545   case X86::BI__builtin_ia32_psrldqi256: {
6546     // Shift value is in bits so divide by 8.
6547     unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
6548 
6549     // If psrldq is shifting the vector more than 15 bytes, emit zero.
6550     if (shiftVal >= 16)
6551       return llvm::Constant::getNullValue(ConvertType(E->getType()));
6552 
6553     uint32_t Indices[32];
6554     // 256-bit psrldq operates on 128-bit lanes so we need to handle that
6555     for (unsigned l = 0; l != 32; l += 16) {
6556       for (unsigned i = 0; i != 16; ++i) {
6557         unsigned Idx = i + shiftVal;
6558         if (Idx >= 16) Idx += 16; // end of lane, switch operand.
6559         Indices[l + i] = Idx + l;
6560       }
6561     }
6562 
6563     llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
6564     Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
6565     Value *Zero = llvm::Constant::getNullValue(VecTy);
6566 
6567     Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
6568     SV = Builder.CreateShuffleVector(Ops[0], Zero, SV, "psrldq");
6569     llvm::Type *ResultType = ConvertType(E->getType());
6570     return Builder.CreateBitCast(SV, ResultType, "cast");
6571   }
6572   case X86::BI__builtin_ia32_movntps:
6573   case X86::BI__builtin_ia32_movntps256:
6574   case X86::BI__builtin_ia32_movntpd:
6575   case X86::BI__builtin_ia32_movntpd256:
6576   case X86::BI__builtin_ia32_movntdq:
6577   case X86::BI__builtin_ia32_movntdq256:
6578   case X86::BI__builtin_ia32_movnti:
6579   case X86::BI__builtin_ia32_movnti64: {
6580     llvm::MDNode *Node = llvm::MDNode::get(
6581         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
6582 
6583     // Convert the type of the pointer to a pointer to the stored type.
6584     Value *BC = Builder.CreateBitCast(Ops[0],
6585                                 llvm::PointerType::getUnqual(Ops[1]->getType()),
6586                                       "cast");
6587     StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC);
6588     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
6589 
6590     // If the operand is an integer, we can't assume alignment. Otherwise,
6591     // assume natural alignment.
6592     QualType ArgTy = E->getArg(1)->getType();
6593     unsigned Align;
6594     if (ArgTy->isIntegerType())
6595       Align = 1;
6596     else
6597       Align = getContext().getTypeSizeInChars(ArgTy).getQuantity();
6598     SI->setAlignment(Align);
6599     return SI;
6600   }
6601   // 3DNow!
6602   case X86::BI__builtin_ia32_pswapdsf:
6603   case X86::BI__builtin_ia32_pswapdsi: {
6604     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
6605     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
6606     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
6607     return Builder.CreateCall(F, Ops, "pswapd");
6608   }
6609   case X86::BI__builtin_ia32_rdrand16_step:
6610   case X86::BI__builtin_ia32_rdrand32_step:
6611   case X86::BI__builtin_ia32_rdrand64_step:
6612   case X86::BI__builtin_ia32_rdseed16_step:
6613   case X86::BI__builtin_ia32_rdseed32_step:
6614   case X86::BI__builtin_ia32_rdseed64_step: {
6615     Intrinsic::ID ID;
6616     switch (BuiltinID) {
6617     default: llvm_unreachable("Unsupported intrinsic!");
6618     case X86::BI__builtin_ia32_rdrand16_step:
6619       ID = Intrinsic::x86_rdrand_16;
6620       break;
6621     case X86::BI__builtin_ia32_rdrand32_step:
6622       ID = Intrinsic::x86_rdrand_32;
6623       break;
6624     case X86::BI__builtin_ia32_rdrand64_step:
6625       ID = Intrinsic::x86_rdrand_64;
6626       break;
6627     case X86::BI__builtin_ia32_rdseed16_step:
6628       ID = Intrinsic::x86_rdseed_16;
6629       break;
6630     case X86::BI__builtin_ia32_rdseed32_step:
6631       ID = Intrinsic::x86_rdseed_32;
6632       break;
6633     case X86::BI__builtin_ia32_rdseed64_step:
6634       ID = Intrinsic::x86_rdseed_64;
6635       break;
6636     }
6637 
6638     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
6639     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
6640                                       Ops[0]);
6641     return Builder.CreateExtractValue(Call, 1);
6642   }
6643   // SSE comparison intrisics
6644   case X86::BI__builtin_ia32_cmpeqps:
6645   case X86::BI__builtin_ia32_cmpltps:
6646   case X86::BI__builtin_ia32_cmpleps:
6647   case X86::BI__builtin_ia32_cmpunordps:
6648   case X86::BI__builtin_ia32_cmpneqps:
6649   case X86::BI__builtin_ia32_cmpnltps:
6650   case X86::BI__builtin_ia32_cmpnleps:
6651   case X86::BI__builtin_ia32_cmpordps:
6652   case X86::BI__builtin_ia32_cmpeqss:
6653   case X86::BI__builtin_ia32_cmpltss:
6654   case X86::BI__builtin_ia32_cmpless:
6655   case X86::BI__builtin_ia32_cmpunordss:
6656   case X86::BI__builtin_ia32_cmpneqss:
6657   case X86::BI__builtin_ia32_cmpnltss:
6658   case X86::BI__builtin_ia32_cmpnless:
6659   case X86::BI__builtin_ia32_cmpordss:
6660   case X86::BI__builtin_ia32_cmpeqpd:
6661   case X86::BI__builtin_ia32_cmpltpd:
6662   case X86::BI__builtin_ia32_cmplepd:
6663   case X86::BI__builtin_ia32_cmpunordpd:
6664   case X86::BI__builtin_ia32_cmpneqpd:
6665   case X86::BI__builtin_ia32_cmpnltpd:
6666   case X86::BI__builtin_ia32_cmpnlepd:
6667   case X86::BI__builtin_ia32_cmpordpd:
6668   case X86::BI__builtin_ia32_cmpeqsd:
6669   case X86::BI__builtin_ia32_cmpltsd:
6670   case X86::BI__builtin_ia32_cmplesd:
6671   case X86::BI__builtin_ia32_cmpunordsd:
6672   case X86::BI__builtin_ia32_cmpneqsd:
6673   case X86::BI__builtin_ia32_cmpnltsd:
6674   case X86::BI__builtin_ia32_cmpnlesd:
6675   case X86::BI__builtin_ia32_cmpordsd:
6676     // These exist so that the builtin that takes an immediate can be bounds
6677     // checked by clang to avoid passing bad immediates to the backend. Since
6678     // AVX has a larger immediate than SSE we would need separate builtins to
6679     // do the different bounds checking. Rather than create a clang specific
6680     // SSE only builtin, this implements eight separate builtins to match gcc
6681     // implementation.
6682 
6683     // Choose the immediate.
6684     unsigned Imm;
6685     switch (BuiltinID) {
6686     default: llvm_unreachable("Unsupported intrinsic!");
6687     case X86::BI__builtin_ia32_cmpeqps:
6688     case X86::BI__builtin_ia32_cmpeqss:
6689     case X86::BI__builtin_ia32_cmpeqpd:
6690     case X86::BI__builtin_ia32_cmpeqsd:
6691       Imm = 0;
6692       break;
6693     case X86::BI__builtin_ia32_cmpltps:
6694     case X86::BI__builtin_ia32_cmpltss:
6695     case X86::BI__builtin_ia32_cmpltpd:
6696     case X86::BI__builtin_ia32_cmpltsd:
6697       Imm = 1;
6698       break;
6699     case X86::BI__builtin_ia32_cmpleps:
6700     case X86::BI__builtin_ia32_cmpless:
6701     case X86::BI__builtin_ia32_cmplepd:
6702     case X86::BI__builtin_ia32_cmplesd:
6703       Imm = 2;
6704       break;
6705     case X86::BI__builtin_ia32_cmpunordps:
6706     case X86::BI__builtin_ia32_cmpunordss:
6707     case X86::BI__builtin_ia32_cmpunordpd:
6708     case X86::BI__builtin_ia32_cmpunordsd:
6709       Imm = 3;
6710       break;
6711     case X86::BI__builtin_ia32_cmpneqps:
6712     case X86::BI__builtin_ia32_cmpneqss:
6713     case X86::BI__builtin_ia32_cmpneqpd:
6714     case X86::BI__builtin_ia32_cmpneqsd:
6715       Imm = 4;
6716       break;
6717     case X86::BI__builtin_ia32_cmpnltps:
6718     case X86::BI__builtin_ia32_cmpnltss:
6719     case X86::BI__builtin_ia32_cmpnltpd:
6720     case X86::BI__builtin_ia32_cmpnltsd:
6721       Imm = 5;
6722       break;
6723     case X86::BI__builtin_ia32_cmpnleps:
6724     case X86::BI__builtin_ia32_cmpnless:
6725     case X86::BI__builtin_ia32_cmpnlepd:
6726     case X86::BI__builtin_ia32_cmpnlesd:
6727       Imm = 6;
6728       break;
6729     case X86::BI__builtin_ia32_cmpordps:
6730     case X86::BI__builtin_ia32_cmpordss:
6731     case X86::BI__builtin_ia32_cmpordpd:
6732     case X86::BI__builtin_ia32_cmpordsd:
6733       Imm = 7;
6734       break;
6735     }
6736 
6737     // Choose the intrinsic ID.
6738     const char *name;
6739     Intrinsic::ID ID;
6740     switch (BuiltinID) {
6741     default: llvm_unreachable("Unsupported intrinsic!");
6742     case X86::BI__builtin_ia32_cmpeqps:
6743     case X86::BI__builtin_ia32_cmpltps:
6744     case X86::BI__builtin_ia32_cmpleps:
6745     case X86::BI__builtin_ia32_cmpunordps:
6746     case X86::BI__builtin_ia32_cmpneqps:
6747     case X86::BI__builtin_ia32_cmpnltps:
6748     case X86::BI__builtin_ia32_cmpnleps:
6749     case X86::BI__builtin_ia32_cmpordps:
6750       name = "cmpps";
6751       ID = Intrinsic::x86_sse_cmp_ps;
6752       break;
6753     case X86::BI__builtin_ia32_cmpeqss:
6754     case X86::BI__builtin_ia32_cmpltss:
6755     case X86::BI__builtin_ia32_cmpless:
6756     case X86::BI__builtin_ia32_cmpunordss:
6757     case X86::BI__builtin_ia32_cmpneqss:
6758     case X86::BI__builtin_ia32_cmpnltss:
6759     case X86::BI__builtin_ia32_cmpnless:
6760     case X86::BI__builtin_ia32_cmpordss:
6761       name = "cmpss";
6762       ID = Intrinsic::x86_sse_cmp_ss;
6763       break;
6764     case X86::BI__builtin_ia32_cmpeqpd:
6765     case X86::BI__builtin_ia32_cmpltpd:
6766     case X86::BI__builtin_ia32_cmplepd:
6767     case X86::BI__builtin_ia32_cmpunordpd:
6768     case X86::BI__builtin_ia32_cmpneqpd:
6769     case X86::BI__builtin_ia32_cmpnltpd:
6770     case X86::BI__builtin_ia32_cmpnlepd:
6771     case X86::BI__builtin_ia32_cmpordpd:
6772       name = "cmppd";
6773       ID = Intrinsic::x86_sse2_cmp_pd;
6774       break;
6775     case X86::BI__builtin_ia32_cmpeqsd:
6776     case X86::BI__builtin_ia32_cmpltsd:
6777     case X86::BI__builtin_ia32_cmplesd:
6778     case X86::BI__builtin_ia32_cmpunordsd:
6779     case X86::BI__builtin_ia32_cmpneqsd:
6780     case X86::BI__builtin_ia32_cmpnltsd:
6781     case X86::BI__builtin_ia32_cmpnlesd:
6782     case X86::BI__builtin_ia32_cmpordsd:
6783       name = "cmpsd";
6784       ID = Intrinsic::x86_sse2_cmp_sd;
6785       break;
6786     }
6787 
6788     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
6789     llvm::Function *F = CGM.getIntrinsic(ID);
6790     return Builder.CreateCall(F, Ops, name);
6791   }
6792 }
6793 
6794 
6795 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
6796                                            const CallExpr *E) {
6797   SmallVector<Value*, 4> Ops;
6798 
6799   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
6800     Ops.push_back(EmitScalarExpr(E->getArg(i)));
6801 
6802   Intrinsic::ID ID = Intrinsic::not_intrinsic;
6803 
6804   switch (BuiltinID) {
6805   default: return nullptr;
6806 
6807   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
6808   // call __builtin_readcyclecounter.
6809   case PPC::BI__builtin_ppc_get_timebase:
6810     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
6811 
6812   // vec_ld, vec_lvsl, vec_lvsr
6813   case PPC::BI__builtin_altivec_lvx:
6814   case PPC::BI__builtin_altivec_lvxl:
6815   case PPC::BI__builtin_altivec_lvebx:
6816   case PPC::BI__builtin_altivec_lvehx:
6817   case PPC::BI__builtin_altivec_lvewx:
6818   case PPC::BI__builtin_altivec_lvsl:
6819   case PPC::BI__builtin_altivec_lvsr:
6820   case PPC::BI__builtin_vsx_lxvd2x:
6821   case PPC::BI__builtin_vsx_lxvw4x:
6822   {
6823     Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
6824 
6825     Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
6826     Ops.pop_back();
6827 
6828     switch (BuiltinID) {
6829     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
6830     case PPC::BI__builtin_altivec_lvx:
6831       ID = Intrinsic::ppc_altivec_lvx;
6832       break;
6833     case PPC::BI__builtin_altivec_lvxl:
6834       ID = Intrinsic::ppc_altivec_lvxl;
6835       break;
6836     case PPC::BI__builtin_altivec_lvebx:
6837       ID = Intrinsic::ppc_altivec_lvebx;
6838       break;
6839     case PPC::BI__builtin_altivec_lvehx:
6840       ID = Intrinsic::ppc_altivec_lvehx;
6841       break;
6842     case PPC::BI__builtin_altivec_lvewx:
6843       ID = Intrinsic::ppc_altivec_lvewx;
6844       break;
6845     case PPC::BI__builtin_altivec_lvsl:
6846       ID = Intrinsic::ppc_altivec_lvsl;
6847       break;
6848     case PPC::BI__builtin_altivec_lvsr:
6849       ID = Intrinsic::ppc_altivec_lvsr;
6850       break;
6851     case PPC::BI__builtin_vsx_lxvd2x:
6852       ID = Intrinsic::ppc_vsx_lxvd2x;
6853       break;
6854     case PPC::BI__builtin_vsx_lxvw4x:
6855       ID = Intrinsic::ppc_vsx_lxvw4x;
6856       break;
6857     }
6858     llvm::Function *F = CGM.getIntrinsic(ID);
6859     return Builder.CreateCall(F, Ops, "");
6860   }
6861 
6862   // vec_st
6863   case PPC::BI__builtin_altivec_stvx:
6864   case PPC::BI__builtin_altivec_stvxl:
6865   case PPC::BI__builtin_altivec_stvebx:
6866   case PPC::BI__builtin_altivec_stvehx:
6867   case PPC::BI__builtin_altivec_stvewx:
6868   case PPC::BI__builtin_vsx_stxvd2x:
6869   case PPC::BI__builtin_vsx_stxvw4x:
6870   {
6871     Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
6872     Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
6873     Ops.pop_back();
6874 
6875     switch (BuiltinID) {
6876     default: llvm_unreachable("Unsupported st intrinsic!");
6877     case PPC::BI__builtin_altivec_stvx:
6878       ID = Intrinsic::ppc_altivec_stvx;
6879       break;
6880     case PPC::BI__builtin_altivec_stvxl:
6881       ID = Intrinsic::ppc_altivec_stvxl;
6882       break;
6883     case PPC::BI__builtin_altivec_stvebx:
6884       ID = Intrinsic::ppc_altivec_stvebx;
6885       break;
6886     case PPC::BI__builtin_altivec_stvehx:
6887       ID = Intrinsic::ppc_altivec_stvehx;
6888       break;
6889     case PPC::BI__builtin_altivec_stvewx:
6890       ID = Intrinsic::ppc_altivec_stvewx;
6891       break;
6892     case PPC::BI__builtin_vsx_stxvd2x:
6893       ID = Intrinsic::ppc_vsx_stxvd2x;
6894       break;
6895     case PPC::BI__builtin_vsx_stxvw4x:
6896       ID = Intrinsic::ppc_vsx_stxvw4x;
6897       break;
6898     }
6899     llvm::Function *F = CGM.getIntrinsic(ID);
6900     return Builder.CreateCall(F, Ops, "");
6901   }
6902   // Square root
6903   case PPC::BI__builtin_vsx_xvsqrtsp:
6904   case PPC::BI__builtin_vsx_xvsqrtdp: {
6905     llvm::Type *ResultType = ConvertType(E->getType());
6906     Value *X = EmitScalarExpr(E->getArg(0));
6907     ID = Intrinsic::sqrt;
6908     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
6909     return Builder.CreateCall(F, X);
6910   }
6911   // Count leading zeros
6912   case PPC::BI__builtin_altivec_vclzb:
6913   case PPC::BI__builtin_altivec_vclzh:
6914   case PPC::BI__builtin_altivec_vclzw:
6915   case PPC::BI__builtin_altivec_vclzd: {
6916     llvm::Type *ResultType = ConvertType(E->getType());
6917     Value *X = EmitScalarExpr(E->getArg(0));
6918     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
6919     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
6920     return Builder.CreateCall(F, {X, Undef});
6921   }
6922   // Copy sign
6923   case PPC::BI__builtin_vsx_xvcpsgnsp:
6924   case PPC::BI__builtin_vsx_xvcpsgndp: {
6925     llvm::Type *ResultType = ConvertType(E->getType());
6926     Value *X = EmitScalarExpr(E->getArg(0));
6927     Value *Y = EmitScalarExpr(E->getArg(1));
6928     ID = Intrinsic::copysign;
6929     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
6930     return Builder.CreateCall(F, {X, Y});
6931   }
6932   // Rounding/truncation
6933   case PPC::BI__builtin_vsx_xvrspip:
6934   case PPC::BI__builtin_vsx_xvrdpip:
6935   case PPC::BI__builtin_vsx_xvrdpim:
6936   case PPC::BI__builtin_vsx_xvrspim:
6937   case PPC::BI__builtin_vsx_xvrdpi:
6938   case PPC::BI__builtin_vsx_xvrspi:
6939   case PPC::BI__builtin_vsx_xvrdpic:
6940   case PPC::BI__builtin_vsx_xvrspic:
6941   case PPC::BI__builtin_vsx_xvrdpiz:
6942   case PPC::BI__builtin_vsx_xvrspiz: {
6943     llvm::Type *ResultType = ConvertType(E->getType());
6944     Value *X = EmitScalarExpr(E->getArg(0));
6945     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
6946         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
6947       ID = Intrinsic::floor;
6948     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
6949              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
6950       ID = Intrinsic::round;
6951     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
6952              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
6953       ID = Intrinsic::nearbyint;
6954     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
6955              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
6956       ID = Intrinsic::ceil;
6957     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
6958              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
6959       ID = Intrinsic::trunc;
6960     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
6961     return Builder.CreateCall(F, X);
6962   }
6963 
6964   // Absolute value
6965   case PPC::BI__builtin_vsx_xvabsdp:
6966   case PPC::BI__builtin_vsx_xvabssp: {
6967     llvm::Type *ResultType = ConvertType(E->getType());
6968     Value *X = EmitScalarExpr(E->getArg(0));
6969     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
6970     return Builder.CreateCall(F, X);
6971   }
6972 
6973   // FMA variations
6974   case PPC::BI__builtin_vsx_xvmaddadp:
6975   case PPC::BI__builtin_vsx_xvmaddasp:
6976   case PPC::BI__builtin_vsx_xvnmaddadp:
6977   case PPC::BI__builtin_vsx_xvnmaddasp:
6978   case PPC::BI__builtin_vsx_xvmsubadp:
6979   case PPC::BI__builtin_vsx_xvmsubasp:
6980   case PPC::BI__builtin_vsx_xvnmsubadp:
6981   case PPC::BI__builtin_vsx_xvnmsubasp: {
6982     llvm::Type *ResultType = ConvertType(E->getType());
6983     Value *X = EmitScalarExpr(E->getArg(0));
6984     Value *Y = EmitScalarExpr(E->getArg(1));
6985     Value *Z = EmitScalarExpr(E->getArg(2));
6986     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
6987     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
6988     switch (BuiltinID) {
6989       case PPC::BI__builtin_vsx_xvmaddadp:
6990       case PPC::BI__builtin_vsx_xvmaddasp:
6991         return Builder.CreateCall(F, {X, Y, Z});
6992       case PPC::BI__builtin_vsx_xvnmaddadp:
6993       case PPC::BI__builtin_vsx_xvnmaddasp:
6994         return Builder.CreateFSub(Zero,
6995                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
6996       case PPC::BI__builtin_vsx_xvmsubadp:
6997       case PPC::BI__builtin_vsx_xvmsubasp:
6998         return Builder.CreateCall(F,
6999                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
7000       case PPC::BI__builtin_vsx_xvnmsubadp:
7001       case PPC::BI__builtin_vsx_xvnmsubasp:
7002         Value *FsubRes =
7003           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
7004         return Builder.CreateFSub(Zero, FsubRes, "sub");
7005     }
7006     llvm_unreachable("Unknown FMA operation");
7007     return nullptr; // Suppress no-return warning
7008   }
7009   }
7010 }
7011 
7012 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
7013                                               const CallExpr *E) {
7014   switch (BuiltinID) {
7015   case AMDGPU::BI__builtin_amdgcn_div_scale:
7016   case AMDGPU::BI__builtin_amdgcn_div_scalef: {
7017     // Translate from the intrinsics's struct return to the builtin's out
7018     // argument.
7019 
7020     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
7021 
7022     llvm::Value *X = EmitScalarExpr(E->getArg(0));
7023     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
7024     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
7025 
7026     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
7027                                            X->getType());
7028 
7029     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
7030 
7031     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
7032     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
7033 
7034     llvm::Type *RealFlagType
7035       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
7036 
7037     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
7038     Builder.CreateStore(FlagExt, FlagOutPtr);
7039     return Result;
7040   }
7041   case AMDGPU::BI__builtin_amdgcn_div_fmas:
7042   case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
7043     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
7044     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
7045     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
7046     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
7047 
7048     llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
7049                                       Src0->getType());
7050     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
7051     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
7052   }
7053   case AMDGPU::BI__builtin_amdgcn_div_fixup:
7054   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
7055     return emitTernaryFPBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
7056   case AMDGPU::BI__builtin_amdgcn_trig_preop:
7057   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
7058     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
7059   case AMDGPU::BI__builtin_amdgcn_rcp:
7060   case AMDGPU::BI__builtin_amdgcn_rcpf:
7061     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
7062   case AMDGPU::BI__builtin_amdgcn_rsq:
7063   case AMDGPU::BI__builtin_amdgcn_rsqf:
7064     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
7065   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
7066   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
7067     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
7068   case AMDGPU::BI__builtin_amdgcn_sinf:
7069     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
7070   case AMDGPU::BI__builtin_amdgcn_cosf:
7071     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
7072   case AMDGPU::BI__builtin_amdgcn_log_clampf:
7073     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
7074   case AMDGPU::BI__builtin_amdgcn_ldexp:
7075   case AMDGPU::BI__builtin_amdgcn_ldexpf:
7076     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
7077   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
7078   case AMDGPU::BI__builtin_amdgcn_frexp_mantf: {
7079     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
7080   }
7081   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
7082   case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
7083     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_exp);
7084   }
7085   case AMDGPU::BI__builtin_amdgcn_class:
7086   case AMDGPU::BI__builtin_amdgcn_classf:
7087     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
7088 
7089     // Legacy amdgpu prefix
7090   case AMDGPU::BI__builtin_amdgpu_rsq:
7091   case AMDGPU::BI__builtin_amdgpu_rsqf: {
7092     if (getTarget().getTriple().getArch() == Triple::amdgcn)
7093       return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
7094     return emitUnaryBuiltin(*this, E, Intrinsic::r600_rsq);
7095   }
7096   case AMDGPU::BI__builtin_amdgpu_ldexp:
7097   case AMDGPU::BI__builtin_amdgpu_ldexpf: {
7098     if (getTarget().getTriple().getArch() == Triple::amdgcn)
7099       return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
7100     return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp);
7101   }
7102   default:
7103     return nullptr;
7104   }
7105 }
7106 
7107 /// Handle a SystemZ function in which the final argument is a pointer
7108 /// to an int that receives the post-instruction CC value.  At the LLVM level
7109 /// this is represented as a function that returns a {result, cc} pair.
7110 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
7111                                          unsigned IntrinsicID,
7112                                          const CallExpr *E) {
7113   unsigned NumArgs = E->getNumArgs() - 1;
7114   SmallVector<Value *, 8> Args(NumArgs);
7115   for (unsigned I = 0; I < NumArgs; ++I)
7116     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
7117   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
7118   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
7119   Value *Call = CGF.Builder.CreateCall(F, Args);
7120   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
7121   CGF.Builder.CreateStore(CC, CCPtr);
7122   return CGF.Builder.CreateExtractValue(Call, 0);
7123 }
7124 
7125 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
7126                                                const CallExpr *E) {
7127   switch (BuiltinID) {
7128   case SystemZ::BI__builtin_tbegin: {
7129     Value *TDB = EmitScalarExpr(E->getArg(0));
7130     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
7131     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
7132     return Builder.CreateCall(F, {TDB, Control});
7133   }
7134   case SystemZ::BI__builtin_tbegin_nofloat: {
7135     Value *TDB = EmitScalarExpr(E->getArg(0));
7136     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
7137     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
7138     return Builder.CreateCall(F, {TDB, Control});
7139   }
7140   case SystemZ::BI__builtin_tbeginc: {
7141     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
7142     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
7143     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
7144     return Builder.CreateCall(F, {TDB, Control});
7145   }
7146   case SystemZ::BI__builtin_tabort: {
7147     Value *Data = EmitScalarExpr(E->getArg(0));
7148     Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
7149     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
7150   }
7151   case SystemZ::BI__builtin_non_tx_store: {
7152     Value *Address = EmitScalarExpr(E->getArg(0));
7153     Value *Data = EmitScalarExpr(E->getArg(1));
7154     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
7155     return Builder.CreateCall(F, {Data, Address});
7156   }
7157 
7158   // Vector builtins.  Note that most vector builtins are mapped automatically
7159   // to target-specific LLVM intrinsics.  The ones handled specially here can
7160   // be represented via standard LLVM IR, which is preferable to enable common
7161   // LLVM optimizations.
7162 
7163   case SystemZ::BI__builtin_s390_vpopctb:
7164   case SystemZ::BI__builtin_s390_vpopcth:
7165   case SystemZ::BI__builtin_s390_vpopctf:
7166   case SystemZ::BI__builtin_s390_vpopctg: {
7167     llvm::Type *ResultType = ConvertType(E->getType());
7168     Value *X = EmitScalarExpr(E->getArg(0));
7169     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
7170     return Builder.CreateCall(F, X);
7171   }
7172 
7173   case SystemZ::BI__builtin_s390_vclzb:
7174   case SystemZ::BI__builtin_s390_vclzh:
7175   case SystemZ::BI__builtin_s390_vclzf:
7176   case SystemZ::BI__builtin_s390_vclzg: {
7177     llvm::Type *ResultType = ConvertType(E->getType());
7178     Value *X = EmitScalarExpr(E->getArg(0));
7179     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7180     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
7181     return Builder.CreateCall(F, {X, Undef});
7182   }
7183 
7184   case SystemZ::BI__builtin_s390_vctzb:
7185   case SystemZ::BI__builtin_s390_vctzh:
7186   case SystemZ::BI__builtin_s390_vctzf:
7187   case SystemZ::BI__builtin_s390_vctzg: {
7188     llvm::Type *ResultType = ConvertType(E->getType());
7189     Value *X = EmitScalarExpr(E->getArg(0));
7190     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7191     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
7192     return Builder.CreateCall(F, {X, Undef});
7193   }
7194 
7195   case SystemZ::BI__builtin_s390_vfsqdb: {
7196     llvm::Type *ResultType = ConvertType(E->getType());
7197     Value *X = EmitScalarExpr(E->getArg(0));
7198     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
7199     return Builder.CreateCall(F, X);
7200   }
7201   case SystemZ::BI__builtin_s390_vfmadb: {
7202     llvm::Type *ResultType = ConvertType(E->getType());
7203     Value *X = EmitScalarExpr(E->getArg(0));
7204     Value *Y = EmitScalarExpr(E->getArg(1));
7205     Value *Z = EmitScalarExpr(E->getArg(2));
7206     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7207     return Builder.CreateCall(F, {X, Y, Z});
7208   }
7209   case SystemZ::BI__builtin_s390_vfmsdb: {
7210     llvm::Type *ResultType = ConvertType(E->getType());
7211     Value *X = EmitScalarExpr(E->getArg(0));
7212     Value *Y = EmitScalarExpr(E->getArg(1));
7213     Value *Z = EmitScalarExpr(E->getArg(2));
7214     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7215     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7216     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
7217   }
7218   case SystemZ::BI__builtin_s390_vflpdb: {
7219     llvm::Type *ResultType = ConvertType(E->getType());
7220     Value *X = EmitScalarExpr(E->getArg(0));
7221     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7222     return Builder.CreateCall(F, X);
7223   }
7224   case SystemZ::BI__builtin_s390_vflndb: {
7225     llvm::Type *ResultType = ConvertType(E->getType());
7226     Value *X = EmitScalarExpr(E->getArg(0));
7227     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7228     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7229     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
7230   }
7231   case SystemZ::BI__builtin_s390_vfidb: {
7232     llvm::Type *ResultType = ConvertType(E->getType());
7233     Value *X = EmitScalarExpr(E->getArg(0));
7234     // Constant-fold the M4 and M5 mask arguments.
7235     llvm::APSInt M4, M5;
7236     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
7237     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
7238     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
7239     (void)IsConstM4; (void)IsConstM5;
7240     // Check whether this instance of vfidb can be represented via a LLVM
7241     // standard intrinsic.  We only support some combinations of M4 and M5.
7242     Intrinsic::ID ID = Intrinsic::not_intrinsic;
7243     switch (M4.getZExtValue()) {
7244     default: break;
7245     case 0:  // IEEE-inexact exception allowed
7246       switch (M5.getZExtValue()) {
7247       default: break;
7248       case 0: ID = Intrinsic::rint; break;
7249       }
7250       break;
7251     case 4:  // IEEE-inexact exception suppressed
7252       switch (M5.getZExtValue()) {
7253       default: break;
7254       case 0: ID = Intrinsic::nearbyint; break;
7255       case 1: ID = Intrinsic::round; break;
7256       case 5: ID = Intrinsic::trunc; break;
7257       case 6: ID = Intrinsic::ceil; break;
7258       case 7: ID = Intrinsic::floor; break;
7259       }
7260       break;
7261     }
7262     if (ID != Intrinsic::not_intrinsic) {
7263       Function *F = CGM.getIntrinsic(ID, ResultType);
7264       return Builder.CreateCall(F, X);
7265     }
7266     Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
7267     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
7268     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
7269     return Builder.CreateCall(F, {X, M4Value, M5Value});
7270   }
7271 
7272   // Vector intrisincs that output the post-instruction CC value.
7273 
7274 #define INTRINSIC_WITH_CC(NAME) \
7275     case SystemZ::BI__builtin_##NAME: \
7276       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
7277 
7278   INTRINSIC_WITH_CC(s390_vpkshs);
7279   INTRINSIC_WITH_CC(s390_vpksfs);
7280   INTRINSIC_WITH_CC(s390_vpksgs);
7281 
7282   INTRINSIC_WITH_CC(s390_vpklshs);
7283   INTRINSIC_WITH_CC(s390_vpklsfs);
7284   INTRINSIC_WITH_CC(s390_vpklsgs);
7285 
7286   INTRINSIC_WITH_CC(s390_vceqbs);
7287   INTRINSIC_WITH_CC(s390_vceqhs);
7288   INTRINSIC_WITH_CC(s390_vceqfs);
7289   INTRINSIC_WITH_CC(s390_vceqgs);
7290 
7291   INTRINSIC_WITH_CC(s390_vchbs);
7292   INTRINSIC_WITH_CC(s390_vchhs);
7293   INTRINSIC_WITH_CC(s390_vchfs);
7294   INTRINSIC_WITH_CC(s390_vchgs);
7295 
7296   INTRINSIC_WITH_CC(s390_vchlbs);
7297   INTRINSIC_WITH_CC(s390_vchlhs);
7298   INTRINSIC_WITH_CC(s390_vchlfs);
7299   INTRINSIC_WITH_CC(s390_vchlgs);
7300 
7301   INTRINSIC_WITH_CC(s390_vfaebs);
7302   INTRINSIC_WITH_CC(s390_vfaehs);
7303   INTRINSIC_WITH_CC(s390_vfaefs);
7304 
7305   INTRINSIC_WITH_CC(s390_vfaezbs);
7306   INTRINSIC_WITH_CC(s390_vfaezhs);
7307   INTRINSIC_WITH_CC(s390_vfaezfs);
7308 
7309   INTRINSIC_WITH_CC(s390_vfeebs);
7310   INTRINSIC_WITH_CC(s390_vfeehs);
7311   INTRINSIC_WITH_CC(s390_vfeefs);
7312 
7313   INTRINSIC_WITH_CC(s390_vfeezbs);
7314   INTRINSIC_WITH_CC(s390_vfeezhs);
7315   INTRINSIC_WITH_CC(s390_vfeezfs);
7316 
7317   INTRINSIC_WITH_CC(s390_vfenebs);
7318   INTRINSIC_WITH_CC(s390_vfenehs);
7319   INTRINSIC_WITH_CC(s390_vfenefs);
7320 
7321   INTRINSIC_WITH_CC(s390_vfenezbs);
7322   INTRINSIC_WITH_CC(s390_vfenezhs);
7323   INTRINSIC_WITH_CC(s390_vfenezfs);
7324 
7325   INTRINSIC_WITH_CC(s390_vistrbs);
7326   INTRINSIC_WITH_CC(s390_vistrhs);
7327   INTRINSIC_WITH_CC(s390_vistrfs);
7328 
7329   INTRINSIC_WITH_CC(s390_vstrcbs);
7330   INTRINSIC_WITH_CC(s390_vstrchs);
7331   INTRINSIC_WITH_CC(s390_vstrcfs);
7332 
7333   INTRINSIC_WITH_CC(s390_vstrczbs);
7334   INTRINSIC_WITH_CC(s390_vstrczhs);
7335   INTRINSIC_WITH_CC(s390_vstrczfs);
7336 
7337   INTRINSIC_WITH_CC(s390_vfcedbs);
7338   INTRINSIC_WITH_CC(s390_vfchdbs);
7339   INTRINSIC_WITH_CC(s390_vfchedbs);
7340 
7341   INTRINSIC_WITH_CC(s390_vftcidb);
7342 
7343 #undef INTRINSIC_WITH_CC
7344 
7345   default:
7346     return nullptr;
7347   }
7348 }
7349 
7350 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
7351                                              const CallExpr *E) {
7352   switch (BuiltinID) {
7353   case NVPTX::BI__nvvm_atom_add_gen_i:
7354   case NVPTX::BI__nvvm_atom_add_gen_l:
7355   case NVPTX::BI__nvvm_atom_add_gen_ll:
7356     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
7357 
7358   case NVPTX::BI__nvvm_atom_sub_gen_i:
7359   case NVPTX::BI__nvvm_atom_sub_gen_l:
7360   case NVPTX::BI__nvvm_atom_sub_gen_ll:
7361     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
7362 
7363   case NVPTX::BI__nvvm_atom_and_gen_i:
7364   case NVPTX::BI__nvvm_atom_and_gen_l:
7365   case NVPTX::BI__nvvm_atom_and_gen_ll:
7366     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
7367 
7368   case NVPTX::BI__nvvm_atom_or_gen_i:
7369   case NVPTX::BI__nvvm_atom_or_gen_l:
7370   case NVPTX::BI__nvvm_atom_or_gen_ll:
7371     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
7372 
7373   case NVPTX::BI__nvvm_atom_xor_gen_i:
7374   case NVPTX::BI__nvvm_atom_xor_gen_l:
7375   case NVPTX::BI__nvvm_atom_xor_gen_ll:
7376     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
7377 
7378   case NVPTX::BI__nvvm_atom_xchg_gen_i:
7379   case NVPTX::BI__nvvm_atom_xchg_gen_l:
7380   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
7381     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
7382 
7383   case NVPTX::BI__nvvm_atom_max_gen_i:
7384   case NVPTX::BI__nvvm_atom_max_gen_l:
7385   case NVPTX::BI__nvvm_atom_max_gen_ll:
7386     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
7387 
7388   case NVPTX::BI__nvvm_atom_max_gen_ui:
7389   case NVPTX::BI__nvvm_atom_max_gen_ul:
7390   case NVPTX::BI__nvvm_atom_max_gen_ull:
7391     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
7392 
7393   case NVPTX::BI__nvvm_atom_min_gen_i:
7394   case NVPTX::BI__nvvm_atom_min_gen_l:
7395   case NVPTX::BI__nvvm_atom_min_gen_ll:
7396     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
7397 
7398   case NVPTX::BI__nvvm_atom_min_gen_ui:
7399   case NVPTX::BI__nvvm_atom_min_gen_ul:
7400   case NVPTX::BI__nvvm_atom_min_gen_ull:
7401     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
7402 
7403   case NVPTX::BI__nvvm_atom_cas_gen_i:
7404   case NVPTX::BI__nvvm_atom_cas_gen_l:
7405   case NVPTX::BI__nvvm_atom_cas_gen_ll:
7406     // __nvvm_atom_cas_gen_* should return the old value rather than the
7407     // success flag.
7408     return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
7409 
7410   case NVPTX::BI__nvvm_atom_add_gen_f: {
7411     Value *Ptr = EmitScalarExpr(E->getArg(0));
7412     Value *Val = EmitScalarExpr(E->getArg(1));
7413     // atomicrmw only deals with integer arguments so we need to use
7414     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
7415     Value *FnALAF32 =
7416         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
7417     return Builder.CreateCall(FnALAF32, {Ptr, Val});
7418   }
7419 
7420   case NVPTX::BI__nvvm_atom_inc_gen_ui: {
7421     Value *Ptr = EmitScalarExpr(E->getArg(0));
7422     Value *Val = EmitScalarExpr(E->getArg(1));
7423     Value *FnALI32 =
7424         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
7425     return Builder.CreateCall(FnALI32, {Ptr, Val});
7426   }
7427 
7428   case NVPTX::BI__nvvm_atom_dec_gen_ui: {
7429     Value *Ptr = EmitScalarExpr(E->getArg(0));
7430     Value *Val = EmitScalarExpr(E->getArg(1));
7431     Value *FnALD32 =
7432         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
7433     return Builder.CreateCall(FnALD32, {Ptr, Val});
7434   }
7435 
7436   default:
7437     return nullptr;
7438   }
7439 }
7440 
7441 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
7442                                                    const CallExpr *E) {
7443   switch (BuiltinID) {
7444   case WebAssembly::BI__builtin_wasm_current_memory: {
7445     llvm::Type *ResultType = ConvertType(E->getType());
7446     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
7447     return Builder.CreateCall(Callee);
7448   }
7449   case WebAssembly::BI__builtin_wasm_grow_memory: {
7450     Value *X = EmitScalarExpr(E->getArg(0));
7451     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
7452     return Builder.CreateCall(Callee, X);
7453   }
7454 
7455   default:
7456     return nullptr;
7457   }
7458 }
7459