1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGObjCRuntime.h"
16 #include "CGOpenCLRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "CodeGenModule.h"
19 #include "ConstantEmitter.h"
20 #include "TargetInfo.h"
21 #include "clang/AST/ASTContext.h"
22 #include "clang/AST/Decl.h"
23 #include "clang/Analysis/Analyses/OSLog.h"
24 #include "clang/Basic/TargetBuiltins.h"
25 #include "clang/Basic/TargetInfo.h"
26 #include "clang/CodeGen/CGFunctionInfo.h"
27 #include "llvm/ADT/StringExtras.h"
28 #include "llvm/IR/CallSite.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/InlineAsm.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/MDBuilder.h"
33 #include <sstream>
34 
35 using namespace clang;
36 using namespace CodeGen;
37 using namespace llvm;
38 
39 static
40 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
41   return std::min(High, std::max(Low, Value));
42 }
43 
44 /// getBuiltinLibFunction - Given a builtin id for a function like
45 /// "__builtin_fabsf", return a Function* for "fabsf".
46 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
47                                                      unsigned BuiltinID) {
48   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
49 
50   // Get the name, skip over the __builtin_ prefix (if necessary).
51   StringRef Name;
52   GlobalDecl D(FD);
53 
54   // If the builtin has been declared explicitly with an assembler label,
55   // use the mangled name. This differs from the plain label on platforms
56   // that prefix labels.
57   if (FD->hasAttr<AsmLabelAttr>())
58     Name = getMangledName(D);
59   else
60     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
61 
62   llvm::FunctionType *Ty =
63     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
64 
65   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
66 }
67 
68 /// Emit the conversions required to turn the given value into an
69 /// integer of the given size.
70 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
71                         QualType T, llvm::IntegerType *IntType) {
72   V = CGF.EmitToMemory(V, T);
73 
74   if (V->getType()->isPointerTy())
75     return CGF.Builder.CreatePtrToInt(V, IntType);
76 
77   assert(V->getType() == IntType);
78   return V;
79 }
80 
81 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
82                           QualType T, llvm::Type *ResultType) {
83   V = CGF.EmitFromMemory(V, T);
84 
85   if (ResultType->isPointerTy())
86     return CGF.Builder.CreateIntToPtr(V, ResultType);
87 
88   assert(V->getType() == ResultType);
89   return V;
90 }
91 
92 /// Utility to insert an atomic instruction based on Instrinsic::ID
93 /// and the expression node.
94 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
95                                     llvm::AtomicRMWInst::BinOp Kind,
96                                     const CallExpr *E) {
97   QualType T = E->getType();
98   assert(E->getArg(0)->getType()->isPointerType());
99   assert(CGF.getContext().hasSameUnqualifiedType(T,
100                                   E->getArg(0)->getType()->getPointeeType()));
101   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
102 
103   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
104   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
105 
106   llvm::IntegerType *IntType =
107     llvm::IntegerType::get(CGF.getLLVMContext(),
108                            CGF.getContext().getTypeSize(T));
109   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
110 
111   llvm::Value *Args[2];
112   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
113   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
114   llvm::Type *ValueType = Args[1]->getType();
115   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
116 
117   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
118       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
119   return EmitFromInt(CGF, Result, T, ValueType);
120 }
121 
122 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
123   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
124   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
125 
126   // Convert the type of the pointer to a pointer to the stored type.
127   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
128   Value *BC = CGF.Builder.CreateBitCast(
129       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
130   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
131   LV.setNontemporal(true);
132   CGF.EmitStoreOfScalar(Val, LV, false);
133   return nullptr;
134 }
135 
136 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
137   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
138 
139   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
140   LV.setNontemporal(true);
141   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
142 }
143 
144 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
145                                llvm::AtomicRMWInst::BinOp Kind,
146                                const CallExpr *E) {
147   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
148 }
149 
150 /// Utility to insert an atomic instruction based Instrinsic::ID and
151 /// the expression node, where the return value is the result of the
152 /// operation.
153 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
154                                    llvm::AtomicRMWInst::BinOp Kind,
155                                    const CallExpr *E,
156                                    Instruction::BinaryOps Op,
157                                    bool Invert = false) {
158   QualType T = E->getType();
159   assert(E->getArg(0)->getType()->isPointerType());
160   assert(CGF.getContext().hasSameUnqualifiedType(T,
161                                   E->getArg(0)->getType()->getPointeeType()));
162   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
163 
164   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
165   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
166 
167   llvm::IntegerType *IntType =
168     llvm::IntegerType::get(CGF.getLLVMContext(),
169                            CGF.getContext().getTypeSize(T));
170   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
171 
172   llvm::Value *Args[2];
173   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
174   llvm::Type *ValueType = Args[1]->getType();
175   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
176   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
177 
178   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
179       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
180   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
181   if (Invert)
182     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
183                                      llvm::ConstantInt::get(IntType, -1));
184   Result = EmitFromInt(CGF, Result, T, ValueType);
185   return RValue::get(Result);
186 }
187 
188 /// @brief Utility to insert an atomic cmpxchg instruction.
189 ///
190 /// @param CGF The current codegen function.
191 /// @param E   Builtin call expression to convert to cmpxchg.
192 ///            arg0 - address to operate on
193 ///            arg1 - value to compare with
194 ///            arg2 - new value
195 /// @param ReturnBool Specifies whether to return success flag of
196 ///                   cmpxchg result or the old value.
197 ///
198 /// @returns result of cmpxchg, according to ReturnBool
199 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
200                                      bool ReturnBool) {
201   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
202   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
203   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
204 
205   llvm::IntegerType *IntType = llvm::IntegerType::get(
206       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
207   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
208 
209   Value *Args[3];
210   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
211   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
212   llvm::Type *ValueType = Args[1]->getType();
213   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
214   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
215 
216   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
217       Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
218       llvm::AtomicOrdering::SequentiallyConsistent);
219   if (ReturnBool)
220     // Extract boolean success flag and zext it to int.
221     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
222                                   CGF.ConvertType(E->getType()));
223   else
224     // Extract old value and emit it using the same type as compare value.
225     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
226                        ValueType);
227 }
228 
229 // Emit a simple mangled intrinsic that has 1 argument and a return type
230 // matching the argument type.
231 static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
232                                const CallExpr *E,
233                                unsigned IntrinsicID) {
234   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
235 
236   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
237   return CGF.Builder.CreateCall(F, Src0);
238 }
239 
240 // Emit an intrinsic that has 2 operands of the same type as its result.
241 static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
242                                 const CallExpr *E,
243                                 unsigned IntrinsicID) {
244   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
245   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
246 
247   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
248   return CGF.Builder.CreateCall(F, { Src0, Src1 });
249 }
250 
251 // Emit an intrinsic that has 3 operands of the same type as its result.
252 static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
253                                  const CallExpr *E,
254                                  unsigned IntrinsicID) {
255   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
256   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
257   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
258 
259   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
260   return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
261 }
262 
263 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
264 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
265                                const CallExpr *E,
266                                unsigned IntrinsicID) {
267   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
268   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
269 
270   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
271   return CGF.Builder.CreateCall(F, {Src0, Src1});
272 }
273 
274 /// EmitFAbs - Emit a call to @llvm.fabs().
275 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
276   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
277   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
278   Call->setDoesNotAccessMemory();
279   return Call;
280 }
281 
282 /// Emit the computation of the sign bit for a floating point value. Returns
283 /// the i1 sign bit value.
284 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
285   LLVMContext &C = CGF.CGM.getLLVMContext();
286 
287   llvm::Type *Ty = V->getType();
288   int Width = Ty->getPrimitiveSizeInBits();
289   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
290   V = CGF.Builder.CreateBitCast(V, IntTy);
291   if (Ty->isPPC_FP128Ty()) {
292     // We want the sign bit of the higher-order double. The bitcast we just
293     // did works as if the double-double was stored to memory and then
294     // read as an i128. The "store" will put the higher-order double in the
295     // lower address in both little- and big-Endian modes, but the "load"
296     // will treat those bits as a different part of the i128: the low bits in
297     // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
298     // we need to shift the high bits down to the low before truncating.
299     Width >>= 1;
300     if (CGF.getTarget().isBigEndian()) {
301       Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
302       V = CGF.Builder.CreateLShr(V, ShiftCst);
303     }
304     // We are truncating value in order to extract the higher-order
305     // double, which we will be using to extract the sign from.
306     IntTy = llvm::IntegerType::get(C, Width);
307     V = CGF.Builder.CreateTrunc(V, IntTy);
308   }
309   Value *Zero = llvm::Constant::getNullValue(IntTy);
310   return CGF.Builder.CreateICmpSLT(V, Zero);
311 }
312 
313 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
314                               const CallExpr *E, llvm::Constant *calleeValue) {
315   CGCallee callee = CGCallee::forDirect(calleeValue, FD);
316   return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
317 }
318 
319 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
320 /// depending on IntrinsicID.
321 ///
322 /// \arg CGF The current codegen function.
323 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
324 /// \arg X The first argument to the llvm.*.with.overflow.*.
325 /// \arg Y The second argument to the llvm.*.with.overflow.*.
326 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
327 /// \returns The result (i.e. sum/product) returned by the intrinsic.
328 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
329                                           const llvm::Intrinsic::ID IntrinsicID,
330                                           llvm::Value *X, llvm::Value *Y,
331                                           llvm::Value *&Carry) {
332   // Make sure we have integers of the same width.
333   assert(X->getType() == Y->getType() &&
334          "Arguments must be the same type. (Did you forget to make sure both "
335          "arguments have the same integer width?)");
336 
337   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
338   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
339   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
340   return CGF.Builder.CreateExtractValue(Tmp, 0);
341 }
342 
343 static Value *emitRangedBuiltin(CodeGenFunction &CGF,
344                                 unsigned IntrinsicID,
345                                 int low, int high) {
346     llvm::MDBuilder MDHelper(CGF.getLLVMContext());
347     llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
348     Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
349     llvm::Instruction *Call = CGF.Builder.CreateCall(F);
350     Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
351     return Call;
352 }
353 
354 namespace {
355   struct WidthAndSignedness {
356     unsigned Width;
357     bool Signed;
358   };
359 }
360 
361 static WidthAndSignedness
362 getIntegerWidthAndSignedness(const clang::ASTContext &context,
363                              const clang::QualType Type) {
364   assert(Type->isIntegerType() && "Given type is not an integer.");
365   unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
366   bool Signed = Type->isSignedIntegerType();
367   return {Width, Signed};
368 }
369 
370 // Given one or more integer types, this function produces an integer type that
371 // encompasses them: any value in one of the given types could be expressed in
372 // the encompassing type.
373 static struct WidthAndSignedness
374 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
375   assert(Types.size() > 0 && "Empty list of types.");
376 
377   // If any of the given types is signed, we must return a signed type.
378   bool Signed = false;
379   for (const auto &Type : Types) {
380     Signed |= Type.Signed;
381   }
382 
383   // The encompassing type must have a width greater than or equal to the width
384   // of the specified types.  Aditionally, if the encompassing type is signed,
385   // its width must be strictly greater than the width of any unsigned types
386   // given.
387   unsigned Width = 0;
388   for (const auto &Type : Types) {
389     unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
390     if (Width < MinWidth) {
391       Width = MinWidth;
392     }
393   }
394 
395   return {Width, Signed};
396 }
397 
398 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
399   llvm::Type *DestType = Int8PtrTy;
400   if (ArgValue->getType() != DestType)
401     ArgValue =
402         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
403 
404   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
405   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
406 }
407 
408 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
409 /// __builtin_object_size(p, @p To) is correct
410 static bool areBOSTypesCompatible(int From, int To) {
411   // Note: Our __builtin_object_size implementation currently treats Type=0 and
412   // Type=2 identically. Encoding this implementation detail here may make
413   // improving __builtin_object_size difficult in the future, so it's omitted.
414   return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
415 }
416 
417 static llvm::Value *
418 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
419   return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
420 }
421 
422 llvm::Value *
423 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
424                                                  llvm::IntegerType *ResType,
425                                                  llvm::Value *EmittedE) {
426   uint64_t ObjectSize;
427   if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
428     return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
429   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
430 }
431 
432 /// Returns a Value corresponding to the size of the given expression.
433 /// This Value may be either of the following:
434 ///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
435 ///     it)
436 ///   - A call to the @llvm.objectsize intrinsic
437 ///
438 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
439 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
440 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
441 llvm::Value *
442 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
443                                        llvm::IntegerType *ResType,
444                                        llvm::Value *EmittedE) {
445   // We need to reference an argument if the pointer is a parameter with the
446   // pass_object_size attribute.
447   if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
448     auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
449     auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
450     if (Param != nullptr && PS != nullptr &&
451         areBOSTypesCompatible(PS->getType(), Type)) {
452       auto Iter = SizeArguments.find(Param);
453       assert(Iter != SizeArguments.end());
454 
455       const ImplicitParamDecl *D = Iter->second;
456       auto DIter = LocalDeclMap.find(D);
457       assert(DIter != LocalDeclMap.end());
458 
459       return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
460                               getContext().getSizeType(), E->getLocStart());
461     }
462   }
463 
464   // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
465   // evaluate E for side-effects. In either case, we shouldn't lower to
466   // @llvm.objectsize.
467   if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
468     return getDefaultBuiltinObjectSizeResult(Type, ResType);
469 
470   Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
471   assert(Ptr->getType()->isPointerTy() &&
472          "Non-pointer passed to __builtin_object_size?");
473 
474   Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
475 
476   // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
477   Value *Min = Builder.getInt1((Type & 2) != 0);
478   // For GCC compatability, __builtin_object_size treat NULL as unknown size.
479   Value *NullIsUnknown = Builder.getTrue();
480   return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
481 }
482 
483 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
484 // handle them here.
485 enum class CodeGenFunction::MSVCIntrin {
486   _BitScanForward,
487   _BitScanReverse,
488   _InterlockedAnd,
489   _InterlockedDecrement,
490   _InterlockedExchange,
491   _InterlockedExchangeAdd,
492   _InterlockedExchangeSub,
493   _InterlockedIncrement,
494   _InterlockedOr,
495   _InterlockedXor,
496   _interlockedbittestandset,
497   __fastfail,
498 };
499 
500 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
501                                             const CallExpr *E) {
502   switch (BuiltinID) {
503   case MSVCIntrin::_BitScanForward:
504   case MSVCIntrin::_BitScanReverse: {
505     Value *ArgValue = EmitScalarExpr(E->getArg(1));
506 
507     llvm::Type *ArgType = ArgValue->getType();
508     llvm::Type *IndexType =
509       EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
510     llvm::Type *ResultType = ConvertType(E->getType());
511 
512     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
513     Value *ResZero = llvm::Constant::getNullValue(ResultType);
514     Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
515 
516     BasicBlock *Begin = Builder.GetInsertBlock();
517     BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
518     Builder.SetInsertPoint(End);
519     PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
520 
521     Builder.SetInsertPoint(Begin);
522     Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
523     BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
524     Builder.CreateCondBr(IsZero, End, NotZero);
525     Result->addIncoming(ResZero, Begin);
526 
527     Builder.SetInsertPoint(NotZero);
528     Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
529 
530     if (BuiltinID == MSVCIntrin::_BitScanForward) {
531       Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
532       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
533       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
534       Builder.CreateStore(ZeroCount, IndexAddress, false);
535     } else {
536       unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
537       Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
538 
539       Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
540       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
541       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
542       Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
543       Builder.CreateStore(Index, IndexAddress, false);
544     }
545     Builder.CreateBr(End);
546     Result->addIncoming(ResOne, NotZero);
547 
548     Builder.SetInsertPoint(End);
549     return Result;
550   }
551   case MSVCIntrin::_InterlockedAnd:
552     return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
553   case MSVCIntrin::_InterlockedExchange:
554     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
555   case MSVCIntrin::_InterlockedExchangeAdd:
556     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
557   case MSVCIntrin::_InterlockedExchangeSub:
558     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
559   case MSVCIntrin::_InterlockedOr:
560     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
561   case MSVCIntrin::_InterlockedXor:
562     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
563 
564   case MSVCIntrin::_interlockedbittestandset: {
565     llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
566     llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
567     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
568         AtomicRMWInst::Or, Addr,
569         Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
570         llvm::AtomicOrdering::SequentiallyConsistent);
571     // Shift the relevant bit to the least significant position, truncate to
572     // the result type, and test the low bit.
573     llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
574     llvm::Value *Truncated =
575         Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
576     return Builder.CreateAnd(Truncated,
577                              ConstantInt::get(Truncated->getType(), 1));
578   }
579 
580   case MSVCIntrin::_InterlockedDecrement: {
581     llvm::Type *IntTy = ConvertType(E->getType());
582     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
583       AtomicRMWInst::Sub,
584       EmitScalarExpr(E->getArg(0)),
585       ConstantInt::get(IntTy, 1),
586       llvm::AtomicOrdering::SequentiallyConsistent);
587     return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
588   }
589   case MSVCIntrin::_InterlockedIncrement: {
590     llvm::Type *IntTy = ConvertType(E->getType());
591     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
592       AtomicRMWInst::Add,
593       EmitScalarExpr(E->getArg(0)),
594       ConstantInt::get(IntTy, 1),
595       llvm::AtomicOrdering::SequentiallyConsistent);
596     return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
597   }
598 
599   case MSVCIntrin::__fastfail: {
600     // Request immediate process termination from the kernel. The instruction
601     // sequences to do this are documented on MSDN:
602     // https://msdn.microsoft.com/en-us/library/dn774154.aspx
603     llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
604     StringRef Asm, Constraints;
605     switch (ISA) {
606     default:
607       ErrorUnsupported(E, "__fastfail call for this architecture");
608       break;
609     case llvm::Triple::x86:
610     case llvm::Triple::x86_64:
611       Asm = "int $$0x29";
612       Constraints = "{cx}";
613       break;
614     case llvm::Triple::thumb:
615       Asm = "udf #251";
616       Constraints = "{r0}";
617       break;
618     }
619     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
620     llvm::InlineAsm *IA =
621         llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
622     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
623         getLLVMContext(), llvm::AttributeList::FunctionIndex,
624         llvm::Attribute::NoReturn);
625     CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
626     CS.setAttributes(NoReturnAttr);
627     return CS.getInstruction();
628   }
629   }
630   llvm_unreachable("Incorrect MSVC intrinsic!");
631 }
632 
633 namespace {
634 // ARC cleanup for __builtin_os_log_format
635 struct CallObjCArcUse final : EHScopeStack::Cleanup {
636   CallObjCArcUse(llvm::Value *object) : object(object) {}
637   llvm::Value *object;
638 
639   void Emit(CodeGenFunction &CGF, Flags flags) override {
640     CGF.EmitARCIntrinsicUse(object);
641   }
642 };
643 }
644 
645 Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
646                                                  BuiltinCheckKind Kind) {
647   assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
648           && "Unsupported builtin check kind");
649 
650   Value *ArgValue = EmitScalarExpr(E);
651   if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef())
652     return ArgValue;
653 
654   SanitizerScope SanScope(this);
655   Value *Cond = Builder.CreateICmpNE(
656       ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
657   EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
658             SanitizerHandler::InvalidBuiltin,
659             {EmitCheckSourceLocation(E->getExprLoc()),
660              llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
661             None);
662   return ArgValue;
663 }
664 
665 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
666                                         unsigned BuiltinID, const CallExpr *E,
667                                         ReturnValueSlot ReturnValue) {
668   // See if we can constant fold this builtin.  If so, don't emit it at all.
669   Expr::EvalResult Result;
670   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
671       !Result.hasSideEffects()) {
672     if (Result.Val.isInt())
673       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
674                                                 Result.Val.getInt()));
675     if (Result.Val.isFloat())
676       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
677                                                Result.Val.getFloat()));
678   }
679 
680   switch (BuiltinID) {
681   default: break;  // Handle intrinsics and libm functions below.
682   case Builtin::BI__builtin___CFStringMakeConstantString:
683   case Builtin::BI__builtin___NSStringMakeConstantString:
684     return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
685   case Builtin::BI__builtin_stdarg_start:
686   case Builtin::BI__builtin_va_start:
687   case Builtin::BI__va_start:
688   case Builtin::BI__builtin_va_end:
689     return RValue::get(
690         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
691                            ? EmitScalarExpr(E->getArg(0))
692                            : EmitVAListRef(E->getArg(0)).getPointer(),
693                        BuiltinID != Builtin::BI__builtin_va_end));
694   case Builtin::BI__builtin_va_copy: {
695     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
696     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
697 
698     llvm::Type *Type = Int8PtrTy;
699 
700     DstPtr = Builder.CreateBitCast(DstPtr, Type);
701     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
702     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
703                                           {DstPtr, SrcPtr}));
704   }
705   case Builtin::BI__builtin_abs:
706   case Builtin::BI__builtin_labs:
707   case Builtin::BI__builtin_llabs: {
708     Value *ArgValue = EmitScalarExpr(E->getArg(0));
709 
710     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
711     Value *CmpResult =
712     Builder.CreateICmpSGE(ArgValue,
713                           llvm::Constant::getNullValue(ArgValue->getType()),
714                                                             "abscond");
715     Value *Result =
716       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
717 
718     return RValue::get(Result);
719   }
720   case Builtin::BI__builtin_fabs:
721   case Builtin::BI__builtin_fabsf:
722   case Builtin::BI__builtin_fabsl: {
723     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
724   }
725   case Builtin::BI__builtin_fmod:
726   case Builtin::BI__builtin_fmodf:
727   case Builtin::BI__builtin_fmodl: {
728     Value *Arg1 = EmitScalarExpr(E->getArg(0));
729     Value *Arg2 = EmitScalarExpr(E->getArg(1));
730     Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
731     return RValue::get(Result);
732   }
733   case Builtin::BI__builtin_copysign:
734   case Builtin::BI__builtin_copysignf:
735   case Builtin::BI__builtin_copysignl: {
736     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
737   }
738   case Builtin::BI__builtin_ceil:
739   case Builtin::BI__builtin_ceilf:
740   case Builtin::BI__builtin_ceill: {
741     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
742   }
743   case Builtin::BI__builtin_floor:
744   case Builtin::BI__builtin_floorf:
745   case Builtin::BI__builtin_floorl: {
746     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
747   }
748   case Builtin::BI__builtin_trunc:
749   case Builtin::BI__builtin_truncf:
750   case Builtin::BI__builtin_truncl: {
751     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
752   }
753   case Builtin::BI__builtin_rint:
754   case Builtin::BI__builtin_rintf:
755   case Builtin::BI__builtin_rintl: {
756     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
757   }
758   case Builtin::BI__builtin_nearbyint:
759   case Builtin::BI__builtin_nearbyintf:
760   case Builtin::BI__builtin_nearbyintl: {
761     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
762   }
763   case Builtin::BI__builtin_round:
764   case Builtin::BI__builtin_roundf:
765   case Builtin::BI__builtin_roundl: {
766     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
767   }
768   case Builtin::BI__builtin_fmin:
769   case Builtin::BI__builtin_fminf:
770   case Builtin::BI__builtin_fminl: {
771     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
772   }
773   case Builtin::BI__builtin_fmax:
774   case Builtin::BI__builtin_fmaxf:
775   case Builtin::BI__builtin_fmaxl: {
776     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
777   }
778   case Builtin::BI__builtin_conj:
779   case Builtin::BI__builtin_conjf:
780   case Builtin::BI__builtin_conjl: {
781     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
782     Value *Real = ComplexVal.first;
783     Value *Imag = ComplexVal.second;
784     Value *Zero =
785       Imag->getType()->isFPOrFPVectorTy()
786         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
787         : llvm::Constant::getNullValue(Imag->getType());
788 
789     Imag = Builder.CreateFSub(Zero, Imag, "sub");
790     return RValue::getComplex(std::make_pair(Real, Imag));
791   }
792   case Builtin::BI__builtin_creal:
793   case Builtin::BI__builtin_crealf:
794   case Builtin::BI__builtin_creall:
795   case Builtin::BIcreal:
796   case Builtin::BIcrealf:
797   case Builtin::BIcreall: {
798     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
799     return RValue::get(ComplexVal.first);
800   }
801 
802   case Builtin::BI__builtin_cimag:
803   case Builtin::BI__builtin_cimagf:
804   case Builtin::BI__builtin_cimagl:
805   case Builtin::BIcimag:
806   case Builtin::BIcimagf:
807   case Builtin::BIcimagl: {
808     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
809     return RValue::get(ComplexVal.second);
810   }
811 
812   case Builtin::BI__builtin_ctzs:
813   case Builtin::BI__builtin_ctz:
814   case Builtin::BI__builtin_ctzl:
815   case Builtin::BI__builtin_ctzll: {
816     Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
817 
818     llvm::Type *ArgType = ArgValue->getType();
819     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
820 
821     llvm::Type *ResultType = ConvertType(E->getType());
822     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
823     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
824     if (Result->getType() != ResultType)
825       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
826                                      "cast");
827     return RValue::get(Result);
828   }
829   case Builtin::BI__builtin_clzs:
830   case Builtin::BI__builtin_clz:
831   case Builtin::BI__builtin_clzl:
832   case Builtin::BI__builtin_clzll: {
833     Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
834 
835     llvm::Type *ArgType = ArgValue->getType();
836     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
837 
838     llvm::Type *ResultType = ConvertType(E->getType());
839     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
840     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
841     if (Result->getType() != ResultType)
842       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
843                                      "cast");
844     return RValue::get(Result);
845   }
846   case Builtin::BI__builtin_ffs:
847   case Builtin::BI__builtin_ffsl:
848   case Builtin::BI__builtin_ffsll: {
849     // ffs(x) -> x ? cttz(x) + 1 : 0
850     Value *ArgValue = EmitScalarExpr(E->getArg(0));
851 
852     llvm::Type *ArgType = ArgValue->getType();
853     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
854 
855     llvm::Type *ResultType = ConvertType(E->getType());
856     Value *Tmp =
857         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
858                           llvm::ConstantInt::get(ArgType, 1));
859     Value *Zero = llvm::Constant::getNullValue(ArgType);
860     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
861     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
862     if (Result->getType() != ResultType)
863       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
864                                      "cast");
865     return RValue::get(Result);
866   }
867   case Builtin::BI__builtin_parity:
868   case Builtin::BI__builtin_parityl:
869   case Builtin::BI__builtin_parityll: {
870     // parity(x) -> ctpop(x) & 1
871     Value *ArgValue = EmitScalarExpr(E->getArg(0));
872 
873     llvm::Type *ArgType = ArgValue->getType();
874     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
875 
876     llvm::Type *ResultType = ConvertType(E->getType());
877     Value *Tmp = Builder.CreateCall(F, ArgValue);
878     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
879     if (Result->getType() != ResultType)
880       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
881                                      "cast");
882     return RValue::get(Result);
883   }
884   case Builtin::BI__popcnt16:
885   case Builtin::BI__popcnt:
886   case Builtin::BI__popcnt64:
887   case Builtin::BI__builtin_popcount:
888   case Builtin::BI__builtin_popcountl:
889   case Builtin::BI__builtin_popcountll: {
890     Value *ArgValue = EmitScalarExpr(E->getArg(0));
891 
892     llvm::Type *ArgType = ArgValue->getType();
893     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
894 
895     llvm::Type *ResultType = ConvertType(E->getType());
896     Value *Result = Builder.CreateCall(F, ArgValue);
897     if (Result->getType() != ResultType)
898       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
899                                      "cast");
900     return RValue::get(Result);
901   }
902   case Builtin::BI_rotr8:
903   case Builtin::BI_rotr16:
904   case Builtin::BI_rotr:
905   case Builtin::BI_lrotr:
906   case Builtin::BI_rotr64: {
907     Value *Val = EmitScalarExpr(E->getArg(0));
908     Value *Shift = EmitScalarExpr(E->getArg(1));
909 
910     llvm::Type *ArgType = Val->getType();
911     Shift = Builder.CreateIntCast(Shift, ArgType, false);
912     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
913     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
914     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
915 
916     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
917     Shift = Builder.CreateAnd(Shift, Mask);
918     Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
919 
920     Value *RightShifted = Builder.CreateLShr(Val, Shift);
921     Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
922     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
923 
924     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
925     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
926     return RValue::get(Result);
927   }
928   case Builtin::BI_rotl8:
929   case Builtin::BI_rotl16:
930   case Builtin::BI_rotl:
931   case Builtin::BI_lrotl:
932   case Builtin::BI_rotl64: {
933     Value *Val = EmitScalarExpr(E->getArg(0));
934     Value *Shift = EmitScalarExpr(E->getArg(1));
935 
936     llvm::Type *ArgType = Val->getType();
937     Shift = Builder.CreateIntCast(Shift, ArgType, false);
938     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
939     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
940     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
941 
942     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
943     Shift = Builder.CreateAnd(Shift, Mask);
944     Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
945 
946     Value *LeftShifted = Builder.CreateShl(Val, Shift);
947     Value *RightShifted = Builder.CreateLShr(Val, RightShift);
948     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
949 
950     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
951     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
952     return RValue::get(Result);
953   }
954   case Builtin::BI__builtin_unpredictable: {
955     // Always return the argument of __builtin_unpredictable. LLVM does not
956     // handle this builtin. Metadata for this builtin should be added directly
957     // to instructions such as branches or switches that use it.
958     return RValue::get(EmitScalarExpr(E->getArg(0)));
959   }
960   case Builtin::BI__builtin_expect: {
961     Value *ArgValue = EmitScalarExpr(E->getArg(0));
962     llvm::Type *ArgType = ArgValue->getType();
963 
964     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
965     // Don't generate llvm.expect on -O0 as the backend won't use it for
966     // anything.
967     // Note, we still IRGen ExpectedValue because it could have side-effects.
968     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
969       return RValue::get(ArgValue);
970 
971     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
972     Value *Result =
973         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
974     return RValue::get(Result);
975   }
976   case Builtin::BI__builtin_assume_aligned: {
977     Value *PtrValue = EmitScalarExpr(E->getArg(0));
978     Value *OffsetValue =
979       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
980 
981     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
982     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
983     unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
984 
985     EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
986     return RValue::get(PtrValue);
987   }
988   case Builtin::BI__assume:
989   case Builtin::BI__builtin_assume: {
990     if (E->getArg(0)->HasSideEffects(getContext()))
991       return RValue::get(nullptr);
992 
993     Value *ArgValue = EmitScalarExpr(E->getArg(0));
994     Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
995     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
996   }
997   case Builtin::BI__builtin_bswap16:
998   case Builtin::BI__builtin_bswap32:
999   case Builtin::BI__builtin_bswap64: {
1000     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
1001   }
1002   case Builtin::BI__builtin_bitreverse8:
1003   case Builtin::BI__builtin_bitreverse16:
1004   case Builtin::BI__builtin_bitreverse32:
1005   case Builtin::BI__builtin_bitreverse64: {
1006     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
1007   }
1008   case Builtin::BI__builtin_object_size: {
1009     unsigned Type =
1010         E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
1011     auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
1012 
1013     // We pass this builtin onto the optimizer so that it can figure out the
1014     // object size in more complex cases.
1015     return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
1016                                              /*EmittedE=*/nullptr));
1017   }
1018   case Builtin::BI__builtin_prefetch: {
1019     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
1020     // FIXME: Technically these constants should of type 'int', yes?
1021     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
1022       llvm::ConstantInt::get(Int32Ty, 0);
1023     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
1024       llvm::ConstantInt::get(Int32Ty, 3);
1025     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
1026     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
1027     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
1028   }
1029   case Builtin::BI__builtin_readcyclecounter: {
1030     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
1031     return RValue::get(Builder.CreateCall(F));
1032   }
1033   case Builtin::BI__builtin___clear_cache: {
1034     Value *Begin = EmitScalarExpr(E->getArg(0));
1035     Value *End = EmitScalarExpr(E->getArg(1));
1036     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
1037     return RValue::get(Builder.CreateCall(F, {Begin, End}));
1038   }
1039   case Builtin::BI__builtin_trap:
1040     return RValue::get(EmitTrapCall(Intrinsic::trap));
1041   case Builtin::BI__debugbreak:
1042     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
1043   case Builtin::BI__builtin_unreachable: {
1044     if (SanOpts.has(SanitizerKind::Unreachable)) {
1045       SanitizerScope SanScope(this);
1046       EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
1047                                SanitizerKind::Unreachable),
1048                 SanitizerHandler::BuiltinUnreachable,
1049                 EmitCheckSourceLocation(E->getExprLoc()), None);
1050     } else
1051       Builder.CreateUnreachable();
1052 
1053     // We do need to preserve an insertion point.
1054     EmitBlock(createBasicBlock("unreachable.cont"));
1055 
1056     return RValue::get(nullptr);
1057   }
1058 
1059   case Builtin::BI__builtin_powi:
1060   case Builtin::BI__builtin_powif:
1061   case Builtin::BI__builtin_powil: {
1062     Value *Base = EmitScalarExpr(E->getArg(0));
1063     Value *Exponent = EmitScalarExpr(E->getArg(1));
1064     llvm::Type *ArgType = Base->getType();
1065     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
1066     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1067   }
1068 
1069   case Builtin::BI__builtin_isgreater:
1070   case Builtin::BI__builtin_isgreaterequal:
1071   case Builtin::BI__builtin_isless:
1072   case Builtin::BI__builtin_islessequal:
1073   case Builtin::BI__builtin_islessgreater:
1074   case Builtin::BI__builtin_isunordered: {
1075     // Ordered comparisons: we know the arguments to these are matching scalar
1076     // floating point values.
1077     Value *LHS = EmitScalarExpr(E->getArg(0));
1078     Value *RHS = EmitScalarExpr(E->getArg(1));
1079 
1080     switch (BuiltinID) {
1081     default: llvm_unreachable("Unknown ordered comparison");
1082     case Builtin::BI__builtin_isgreater:
1083       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
1084       break;
1085     case Builtin::BI__builtin_isgreaterequal:
1086       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
1087       break;
1088     case Builtin::BI__builtin_isless:
1089       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
1090       break;
1091     case Builtin::BI__builtin_islessequal:
1092       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
1093       break;
1094     case Builtin::BI__builtin_islessgreater:
1095       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
1096       break;
1097     case Builtin::BI__builtin_isunordered:
1098       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
1099       break;
1100     }
1101     // ZExt bool to int type.
1102     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
1103   }
1104   case Builtin::BI__builtin_isnan: {
1105     Value *V = EmitScalarExpr(E->getArg(0));
1106     V = Builder.CreateFCmpUNO(V, V, "cmp");
1107     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1108   }
1109 
1110   case Builtin::BIfinite:
1111   case Builtin::BI__finite:
1112   case Builtin::BIfinitef:
1113   case Builtin::BI__finitef:
1114   case Builtin::BIfinitel:
1115   case Builtin::BI__finitel:
1116   case Builtin::BI__builtin_isinf:
1117   case Builtin::BI__builtin_isfinite: {
1118     // isinf(x)    --> fabs(x) == infinity
1119     // isfinite(x) --> fabs(x) != infinity
1120     // x != NaN via the ordered compare in either case.
1121     Value *V = EmitScalarExpr(E->getArg(0));
1122     Value *Fabs = EmitFAbs(*this, V);
1123     Constant *Infinity = ConstantFP::getInfinity(V->getType());
1124     CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
1125                                   ? CmpInst::FCMP_OEQ
1126                                   : CmpInst::FCMP_ONE;
1127     Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
1128     return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
1129   }
1130 
1131   case Builtin::BI__builtin_isinf_sign: {
1132     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
1133     Value *Arg = EmitScalarExpr(E->getArg(0));
1134     Value *AbsArg = EmitFAbs(*this, Arg);
1135     Value *IsInf = Builder.CreateFCmpOEQ(
1136         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
1137     Value *IsNeg = EmitSignBit(*this, Arg);
1138 
1139     llvm::Type *IntTy = ConvertType(E->getType());
1140     Value *Zero = Constant::getNullValue(IntTy);
1141     Value *One = ConstantInt::get(IntTy, 1);
1142     Value *NegativeOne = ConstantInt::get(IntTy, -1);
1143     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
1144     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
1145     return RValue::get(Result);
1146   }
1147 
1148   case Builtin::BI__builtin_isnormal: {
1149     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
1150     Value *V = EmitScalarExpr(E->getArg(0));
1151     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
1152 
1153     Value *Abs = EmitFAbs(*this, V);
1154     Value *IsLessThanInf =
1155       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
1156     APFloat Smallest = APFloat::getSmallestNormalized(
1157                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
1158     Value *IsNormal =
1159       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
1160                             "isnormal");
1161     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
1162     V = Builder.CreateAnd(V, IsNormal, "and");
1163     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1164   }
1165 
1166   case Builtin::BI__builtin_fpclassify: {
1167     Value *V = EmitScalarExpr(E->getArg(5));
1168     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
1169 
1170     // Create Result
1171     BasicBlock *Begin = Builder.GetInsertBlock();
1172     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
1173     Builder.SetInsertPoint(End);
1174     PHINode *Result =
1175       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
1176                         "fpclassify_result");
1177 
1178     // if (V==0) return FP_ZERO
1179     Builder.SetInsertPoint(Begin);
1180     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
1181                                           "iszero");
1182     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
1183     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
1184     Builder.CreateCondBr(IsZero, End, NotZero);
1185     Result->addIncoming(ZeroLiteral, Begin);
1186 
1187     // if (V != V) return FP_NAN
1188     Builder.SetInsertPoint(NotZero);
1189     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
1190     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
1191     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
1192     Builder.CreateCondBr(IsNan, End, NotNan);
1193     Result->addIncoming(NanLiteral, NotZero);
1194 
1195     // if (fabs(V) == infinity) return FP_INFINITY
1196     Builder.SetInsertPoint(NotNan);
1197     Value *VAbs = EmitFAbs(*this, V);
1198     Value *IsInf =
1199       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
1200                             "isinf");
1201     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
1202     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
1203     Builder.CreateCondBr(IsInf, End, NotInf);
1204     Result->addIncoming(InfLiteral, NotNan);
1205 
1206     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
1207     Builder.SetInsertPoint(NotInf);
1208     APFloat Smallest = APFloat::getSmallestNormalized(
1209         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
1210     Value *IsNormal =
1211       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
1212                             "isnormal");
1213     Value *NormalResult =
1214       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
1215                            EmitScalarExpr(E->getArg(3)));
1216     Builder.CreateBr(End);
1217     Result->addIncoming(NormalResult, NotInf);
1218 
1219     // return Result
1220     Builder.SetInsertPoint(End);
1221     return RValue::get(Result);
1222   }
1223 
1224   case Builtin::BIalloca:
1225   case Builtin::BI_alloca:
1226   case Builtin::BI__builtin_alloca: {
1227     Value *Size = EmitScalarExpr(E->getArg(0));
1228     const TargetInfo &TI = getContext().getTargetInfo();
1229     // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
1230     unsigned SuitableAlignmentInBytes =
1231         CGM.getContext()
1232             .toCharUnitsFromBits(TI.getSuitableAlign())
1233             .getQuantity();
1234     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1235     AI->setAlignment(SuitableAlignmentInBytes);
1236     return RValue::get(AI);
1237   }
1238 
1239   case Builtin::BI__builtin_alloca_with_align: {
1240     Value *Size = EmitScalarExpr(E->getArg(0));
1241     Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
1242     auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
1243     unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
1244     unsigned AlignmentInBytes =
1245         CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
1246     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1247     AI->setAlignment(AlignmentInBytes);
1248     return RValue::get(AI);
1249   }
1250 
1251   case Builtin::BIbzero:
1252   case Builtin::BI__builtin_bzero: {
1253     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1254     Value *SizeVal = EmitScalarExpr(E->getArg(1));
1255     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1256                         E->getArg(0)->getExprLoc(), FD, 0);
1257     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
1258     return RValue::get(Dest.getPointer());
1259   }
1260   case Builtin::BImemcpy:
1261   case Builtin::BI__builtin_memcpy: {
1262     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1263     Address Src = EmitPointerWithAlignment(E->getArg(1));
1264     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1265     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1266                         E->getArg(0)->getExprLoc(), FD, 0);
1267     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1268                         E->getArg(1)->getExprLoc(), FD, 1);
1269     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1270     return RValue::get(Dest.getPointer());
1271   }
1272 
1273   case Builtin::BI__builtin_char_memchr:
1274     BuiltinID = Builtin::BI__builtin_memchr;
1275     break;
1276 
1277   case Builtin::BI__builtin___memcpy_chk: {
1278     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
1279     llvm::APSInt Size, DstSize;
1280     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1281         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1282       break;
1283     if (Size.ugt(DstSize))
1284       break;
1285     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1286     Address Src = EmitPointerWithAlignment(E->getArg(1));
1287     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1288     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1289     return RValue::get(Dest.getPointer());
1290   }
1291 
1292   case Builtin::BI__builtin_objc_memmove_collectable: {
1293     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1294     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1295     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1296     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1297                                                   DestAddr, SrcAddr, SizeVal);
1298     return RValue::get(DestAddr.getPointer());
1299   }
1300 
1301   case Builtin::BI__builtin___memmove_chk: {
1302     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1303     llvm::APSInt Size, DstSize;
1304     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1305         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1306       break;
1307     if (Size.ugt(DstSize))
1308       break;
1309     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1310     Address Src = EmitPointerWithAlignment(E->getArg(1));
1311     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1312     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1313     return RValue::get(Dest.getPointer());
1314   }
1315 
1316   case Builtin::BImemmove:
1317   case Builtin::BI__builtin_memmove: {
1318     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1319     Address Src = EmitPointerWithAlignment(E->getArg(1));
1320     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1321     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1322                         E->getArg(0)->getExprLoc(), FD, 0);
1323     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1324                         E->getArg(1)->getExprLoc(), FD, 1);
1325     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1326     return RValue::get(Dest.getPointer());
1327   }
1328   case Builtin::BImemset:
1329   case Builtin::BI__builtin_memset: {
1330     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1331     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1332                                          Builder.getInt8Ty());
1333     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1334     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1335                         E->getArg(0)->getExprLoc(), FD, 0);
1336     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1337     return RValue::get(Dest.getPointer());
1338   }
1339   case Builtin::BI__builtin___memset_chk: {
1340     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1341     llvm::APSInt Size, DstSize;
1342     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1343         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1344       break;
1345     if (Size.ugt(DstSize))
1346       break;
1347     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1348     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1349                                          Builder.getInt8Ty());
1350     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1351     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1352     return RValue::get(Dest.getPointer());
1353   }
1354   case Builtin::BI__builtin_dwarf_cfa: {
1355     // The offset in bytes from the first argument to the CFA.
1356     //
1357     // Why on earth is this in the frontend?  Is there any reason at
1358     // all that the backend can't reasonably determine this while
1359     // lowering llvm.eh.dwarf.cfa()?
1360     //
1361     // TODO: If there's a satisfactory reason, add a target hook for
1362     // this instead of hard-coding 0, which is correct for most targets.
1363     int32_t Offset = 0;
1364 
1365     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1366     return RValue::get(Builder.CreateCall(F,
1367                                       llvm::ConstantInt::get(Int32Ty, Offset)));
1368   }
1369   case Builtin::BI__builtin_return_address: {
1370     Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
1371                                                    getContext().UnsignedIntTy);
1372     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1373     return RValue::get(Builder.CreateCall(F, Depth));
1374   }
1375   case Builtin::BI_ReturnAddress: {
1376     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1377     return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
1378   }
1379   case Builtin::BI__builtin_frame_address: {
1380     Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
1381                                                    getContext().UnsignedIntTy);
1382     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1383     return RValue::get(Builder.CreateCall(F, Depth));
1384   }
1385   case Builtin::BI__builtin_extract_return_addr: {
1386     Value *Address = EmitScalarExpr(E->getArg(0));
1387     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1388     return RValue::get(Result);
1389   }
1390   case Builtin::BI__builtin_frob_return_addr: {
1391     Value *Address = EmitScalarExpr(E->getArg(0));
1392     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1393     return RValue::get(Result);
1394   }
1395   case Builtin::BI__builtin_dwarf_sp_column: {
1396     llvm::IntegerType *Ty
1397       = cast<llvm::IntegerType>(ConvertType(E->getType()));
1398     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1399     if (Column == -1) {
1400       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1401       return RValue::get(llvm::UndefValue::get(Ty));
1402     }
1403     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1404   }
1405   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1406     Value *Address = EmitScalarExpr(E->getArg(0));
1407     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1408       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1409     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1410   }
1411   case Builtin::BI__builtin_eh_return: {
1412     Value *Int = EmitScalarExpr(E->getArg(0));
1413     Value *Ptr = EmitScalarExpr(E->getArg(1));
1414 
1415     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1416     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1417            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1418     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1419                                   ? Intrinsic::eh_return_i32
1420                                   : Intrinsic::eh_return_i64);
1421     Builder.CreateCall(F, {Int, Ptr});
1422     Builder.CreateUnreachable();
1423 
1424     // We do need to preserve an insertion point.
1425     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1426 
1427     return RValue::get(nullptr);
1428   }
1429   case Builtin::BI__builtin_unwind_init: {
1430     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1431     return RValue::get(Builder.CreateCall(F));
1432   }
1433   case Builtin::BI__builtin_extend_pointer: {
1434     // Extends a pointer to the size of an _Unwind_Word, which is
1435     // uint64_t on all platforms.  Generally this gets poked into a
1436     // register and eventually used as an address, so if the
1437     // addressing registers are wider than pointers and the platform
1438     // doesn't implicitly ignore high-order bits when doing
1439     // addressing, we need to make sure we zext / sext based on
1440     // the platform's expectations.
1441     //
1442     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1443 
1444     // Cast the pointer to intptr_t.
1445     Value *Ptr = EmitScalarExpr(E->getArg(0));
1446     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1447 
1448     // If that's 64 bits, we're done.
1449     if (IntPtrTy->getBitWidth() == 64)
1450       return RValue::get(Result);
1451 
1452     // Otherwise, ask the codegen data what to do.
1453     if (getTargetHooks().extendPointerWithSExt())
1454       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1455     else
1456       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1457   }
1458   case Builtin::BI__builtin_setjmp: {
1459     // Buffer is a void**.
1460     Address Buf = EmitPointerWithAlignment(E->getArg(0));
1461 
1462     // Store the frame pointer to the setjmp buffer.
1463     Value *FrameAddr =
1464       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1465                          ConstantInt::get(Int32Ty, 0));
1466     Builder.CreateStore(FrameAddr, Buf);
1467 
1468     // Store the stack pointer to the setjmp buffer.
1469     Value *StackAddr =
1470         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1471     Address StackSaveSlot =
1472       Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1473     Builder.CreateStore(StackAddr, StackSaveSlot);
1474 
1475     // Call LLVM's EH setjmp, which is lightweight.
1476     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1477     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1478     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1479   }
1480   case Builtin::BI__builtin_longjmp: {
1481     Value *Buf = EmitScalarExpr(E->getArg(0));
1482     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1483 
1484     // Call LLVM's EH longjmp, which is lightweight.
1485     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1486 
1487     // longjmp doesn't return; mark this as unreachable.
1488     Builder.CreateUnreachable();
1489 
1490     // We do need to preserve an insertion point.
1491     EmitBlock(createBasicBlock("longjmp.cont"));
1492 
1493     return RValue::get(nullptr);
1494   }
1495   case Builtin::BI__sync_fetch_and_add:
1496   case Builtin::BI__sync_fetch_and_sub:
1497   case Builtin::BI__sync_fetch_and_or:
1498   case Builtin::BI__sync_fetch_and_and:
1499   case Builtin::BI__sync_fetch_and_xor:
1500   case Builtin::BI__sync_fetch_and_nand:
1501   case Builtin::BI__sync_add_and_fetch:
1502   case Builtin::BI__sync_sub_and_fetch:
1503   case Builtin::BI__sync_and_and_fetch:
1504   case Builtin::BI__sync_or_and_fetch:
1505   case Builtin::BI__sync_xor_and_fetch:
1506   case Builtin::BI__sync_nand_and_fetch:
1507   case Builtin::BI__sync_val_compare_and_swap:
1508   case Builtin::BI__sync_bool_compare_and_swap:
1509   case Builtin::BI__sync_lock_test_and_set:
1510   case Builtin::BI__sync_lock_release:
1511   case Builtin::BI__sync_swap:
1512     llvm_unreachable("Shouldn't make it through sema");
1513   case Builtin::BI__sync_fetch_and_add_1:
1514   case Builtin::BI__sync_fetch_and_add_2:
1515   case Builtin::BI__sync_fetch_and_add_4:
1516   case Builtin::BI__sync_fetch_and_add_8:
1517   case Builtin::BI__sync_fetch_and_add_16:
1518     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1519   case Builtin::BI__sync_fetch_and_sub_1:
1520   case Builtin::BI__sync_fetch_and_sub_2:
1521   case Builtin::BI__sync_fetch_and_sub_4:
1522   case Builtin::BI__sync_fetch_and_sub_8:
1523   case Builtin::BI__sync_fetch_and_sub_16:
1524     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1525   case Builtin::BI__sync_fetch_and_or_1:
1526   case Builtin::BI__sync_fetch_and_or_2:
1527   case Builtin::BI__sync_fetch_and_or_4:
1528   case Builtin::BI__sync_fetch_and_or_8:
1529   case Builtin::BI__sync_fetch_and_or_16:
1530     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1531   case Builtin::BI__sync_fetch_and_and_1:
1532   case Builtin::BI__sync_fetch_and_and_2:
1533   case Builtin::BI__sync_fetch_and_and_4:
1534   case Builtin::BI__sync_fetch_and_and_8:
1535   case Builtin::BI__sync_fetch_and_and_16:
1536     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1537   case Builtin::BI__sync_fetch_and_xor_1:
1538   case Builtin::BI__sync_fetch_and_xor_2:
1539   case Builtin::BI__sync_fetch_and_xor_4:
1540   case Builtin::BI__sync_fetch_and_xor_8:
1541   case Builtin::BI__sync_fetch_and_xor_16:
1542     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1543   case Builtin::BI__sync_fetch_and_nand_1:
1544   case Builtin::BI__sync_fetch_and_nand_2:
1545   case Builtin::BI__sync_fetch_and_nand_4:
1546   case Builtin::BI__sync_fetch_and_nand_8:
1547   case Builtin::BI__sync_fetch_and_nand_16:
1548     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1549 
1550   // Clang extensions: not overloaded yet.
1551   case Builtin::BI__sync_fetch_and_min:
1552     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1553   case Builtin::BI__sync_fetch_and_max:
1554     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1555   case Builtin::BI__sync_fetch_and_umin:
1556     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1557   case Builtin::BI__sync_fetch_and_umax:
1558     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1559 
1560   case Builtin::BI__sync_add_and_fetch_1:
1561   case Builtin::BI__sync_add_and_fetch_2:
1562   case Builtin::BI__sync_add_and_fetch_4:
1563   case Builtin::BI__sync_add_and_fetch_8:
1564   case Builtin::BI__sync_add_and_fetch_16:
1565     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1566                                 llvm::Instruction::Add);
1567   case Builtin::BI__sync_sub_and_fetch_1:
1568   case Builtin::BI__sync_sub_and_fetch_2:
1569   case Builtin::BI__sync_sub_and_fetch_4:
1570   case Builtin::BI__sync_sub_and_fetch_8:
1571   case Builtin::BI__sync_sub_and_fetch_16:
1572     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1573                                 llvm::Instruction::Sub);
1574   case Builtin::BI__sync_and_and_fetch_1:
1575   case Builtin::BI__sync_and_and_fetch_2:
1576   case Builtin::BI__sync_and_and_fetch_4:
1577   case Builtin::BI__sync_and_and_fetch_8:
1578   case Builtin::BI__sync_and_and_fetch_16:
1579     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1580                                 llvm::Instruction::And);
1581   case Builtin::BI__sync_or_and_fetch_1:
1582   case Builtin::BI__sync_or_and_fetch_2:
1583   case Builtin::BI__sync_or_and_fetch_4:
1584   case Builtin::BI__sync_or_and_fetch_8:
1585   case Builtin::BI__sync_or_and_fetch_16:
1586     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1587                                 llvm::Instruction::Or);
1588   case Builtin::BI__sync_xor_and_fetch_1:
1589   case Builtin::BI__sync_xor_and_fetch_2:
1590   case Builtin::BI__sync_xor_and_fetch_4:
1591   case Builtin::BI__sync_xor_and_fetch_8:
1592   case Builtin::BI__sync_xor_and_fetch_16:
1593     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1594                                 llvm::Instruction::Xor);
1595   case Builtin::BI__sync_nand_and_fetch_1:
1596   case Builtin::BI__sync_nand_and_fetch_2:
1597   case Builtin::BI__sync_nand_and_fetch_4:
1598   case Builtin::BI__sync_nand_and_fetch_8:
1599   case Builtin::BI__sync_nand_and_fetch_16:
1600     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1601                                 llvm::Instruction::And, true);
1602 
1603   case Builtin::BI__sync_val_compare_and_swap_1:
1604   case Builtin::BI__sync_val_compare_and_swap_2:
1605   case Builtin::BI__sync_val_compare_and_swap_4:
1606   case Builtin::BI__sync_val_compare_and_swap_8:
1607   case Builtin::BI__sync_val_compare_and_swap_16:
1608     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1609 
1610   case Builtin::BI__sync_bool_compare_and_swap_1:
1611   case Builtin::BI__sync_bool_compare_and_swap_2:
1612   case Builtin::BI__sync_bool_compare_and_swap_4:
1613   case Builtin::BI__sync_bool_compare_and_swap_8:
1614   case Builtin::BI__sync_bool_compare_and_swap_16:
1615     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1616 
1617   case Builtin::BI__sync_swap_1:
1618   case Builtin::BI__sync_swap_2:
1619   case Builtin::BI__sync_swap_4:
1620   case Builtin::BI__sync_swap_8:
1621   case Builtin::BI__sync_swap_16:
1622     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1623 
1624   case Builtin::BI__sync_lock_test_and_set_1:
1625   case Builtin::BI__sync_lock_test_and_set_2:
1626   case Builtin::BI__sync_lock_test_and_set_4:
1627   case Builtin::BI__sync_lock_test_and_set_8:
1628   case Builtin::BI__sync_lock_test_and_set_16:
1629     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1630 
1631   case Builtin::BI__sync_lock_release_1:
1632   case Builtin::BI__sync_lock_release_2:
1633   case Builtin::BI__sync_lock_release_4:
1634   case Builtin::BI__sync_lock_release_8:
1635   case Builtin::BI__sync_lock_release_16: {
1636     Value *Ptr = EmitScalarExpr(E->getArg(0));
1637     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1638     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1639     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1640                                              StoreSize.getQuantity() * 8);
1641     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1642     llvm::StoreInst *Store =
1643       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1644                                  StoreSize);
1645     Store->setAtomic(llvm::AtomicOrdering::Release);
1646     return RValue::get(nullptr);
1647   }
1648 
1649   case Builtin::BI__sync_synchronize: {
1650     // We assume this is supposed to correspond to a C++0x-style
1651     // sequentially-consistent fence (i.e. this is only usable for
1652     // synchonization, not device I/O or anything like that). This intrinsic
1653     // is really badly designed in the sense that in theory, there isn't
1654     // any way to safely use it... but in practice, it mostly works
1655     // to use it with non-atomic loads and stores to get acquire/release
1656     // semantics.
1657     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1658     return RValue::get(nullptr);
1659   }
1660 
1661   case Builtin::BI__builtin_nontemporal_load:
1662     return RValue::get(EmitNontemporalLoad(*this, E));
1663   case Builtin::BI__builtin_nontemporal_store:
1664     return RValue::get(EmitNontemporalStore(*this, E));
1665   case Builtin::BI__c11_atomic_is_lock_free:
1666   case Builtin::BI__atomic_is_lock_free: {
1667     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1668     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1669     // _Atomic(T) is always properly-aligned.
1670     const char *LibCallName = "__atomic_is_lock_free";
1671     CallArgList Args;
1672     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1673              getContext().getSizeType());
1674     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1675       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1676                getContext().VoidPtrTy);
1677     else
1678       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1679                getContext().VoidPtrTy);
1680     const CGFunctionInfo &FuncInfo =
1681         CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1682     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1683     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1684     return EmitCall(FuncInfo, CGCallee::forDirect(Func),
1685                     ReturnValueSlot(), Args);
1686   }
1687 
1688   case Builtin::BI__atomic_test_and_set: {
1689     // Look at the argument type to determine whether this is a volatile
1690     // operation. The parameter type is always volatile.
1691     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1692     bool Volatile =
1693         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1694 
1695     Value *Ptr = EmitScalarExpr(E->getArg(0));
1696     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1697     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1698     Value *NewVal = Builder.getInt8(1);
1699     Value *Order = EmitScalarExpr(E->getArg(1));
1700     if (isa<llvm::ConstantInt>(Order)) {
1701       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1702       AtomicRMWInst *Result = nullptr;
1703       switch (ord) {
1704       case 0:  // memory_order_relaxed
1705       default: // invalid order
1706         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1707                                          llvm::AtomicOrdering::Monotonic);
1708         break;
1709       case 1: // memory_order_consume
1710       case 2: // memory_order_acquire
1711         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1712                                          llvm::AtomicOrdering::Acquire);
1713         break;
1714       case 3: // memory_order_release
1715         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1716                                          llvm::AtomicOrdering::Release);
1717         break;
1718       case 4: // memory_order_acq_rel
1719 
1720         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1721                                          llvm::AtomicOrdering::AcquireRelease);
1722         break;
1723       case 5: // memory_order_seq_cst
1724         Result = Builder.CreateAtomicRMW(
1725             llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1726             llvm::AtomicOrdering::SequentiallyConsistent);
1727         break;
1728       }
1729       Result->setVolatile(Volatile);
1730       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1731     }
1732 
1733     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1734 
1735     llvm::BasicBlock *BBs[5] = {
1736       createBasicBlock("monotonic", CurFn),
1737       createBasicBlock("acquire", CurFn),
1738       createBasicBlock("release", CurFn),
1739       createBasicBlock("acqrel", CurFn),
1740       createBasicBlock("seqcst", CurFn)
1741     };
1742     llvm::AtomicOrdering Orders[5] = {
1743         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1744         llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1745         llvm::AtomicOrdering::SequentiallyConsistent};
1746 
1747     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1748     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1749 
1750     Builder.SetInsertPoint(ContBB);
1751     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1752 
1753     for (unsigned i = 0; i < 5; ++i) {
1754       Builder.SetInsertPoint(BBs[i]);
1755       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1756                                                    Ptr, NewVal, Orders[i]);
1757       RMW->setVolatile(Volatile);
1758       Result->addIncoming(RMW, BBs[i]);
1759       Builder.CreateBr(ContBB);
1760     }
1761 
1762     SI->addCase(Builder.getInt32(0), BBs[0]);
1763     SI->addCase(Builder.getInt32(1), BBs[1]);
1764     SI->addCase(Builder.getInt32(2), BBs[1]);
1765     SI->addCase(Builder.getInt32(3), BBs[2]);
1766     SI->addCase(Builder.getInt32(4), BBs[3]);
1767     SI->addCase(Builder.getInt32(5), BBs[4]);
1768 
1769     Builder.SetInsertPoint(ContBB);
1770     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1771   }
1772 
1773   case Builtin::BI__atomic_clear: {
1774     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1775     bool Volatile =
1776         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1777 
1778     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1779     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1780     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1781     Value *NewVal = Builder.getInt8(0);
1782     Value *Order = EmitScalarExpr(E->getArg(1));
1783     if (isa<llvm::ConstantInt>(Order)) {
1784       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1785       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1786       switch (ord) {
1787       case 0:  // memory_order_relaxed
1788       default: // invalid order
1789         Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1790         break;
1791       case 3:  // memory_order_release
1792         Store->setOrdering(llvm::AtomicOrdering::Release);
1793         break;
1794       case 5:  // memory_order_seq_cst
1795         Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1796         break;
1797       }
1798       return RValue::get(nullptr);
1799     }
1800 
1801     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1802 
1803     llvm::BasicBlock *BBs[3] = {
1804       createBasicBlock("monotonic", CurFn),
1805       createBasicBlock("release", CurFn),
1806       createBasicBlock("seqcst", CurFn)
1807     };
1808     llvm::AtomicOrdering Orders[3] = {
1809         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1810         llvm::AtomicOrdering::SequentiallyConsistent};
1811 
1812     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1813     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1814 
1815     for (unsigned i = 0; i < 3; ++i) {
1816       Builder.SetInsertPoint(BBs[i]);
1817       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1818       Store->setOrdering(Orders[i]);
1819       Builder.CreateBr(ContBB);
1820     }
1821 
1822     SI->addCase(Builder.getInt32(0), BBs[0]);
1823     SI->addCase(Builder.getInt32(3), BBs[1]);
1824     SI->addCase(Builder.getInt32(5), BBs[2]);
1825 
1826     Builder.SetInsertPoint(ContBB);
1827     return RValue::get(nullptr);
1828   }
1829 
1830   case Builtin::BI__atomic_thread_fence:
1831   case Builtin::BI__atomic_signal_fence:
1832   case Builtin::BI__c11_atomic_thread_fence:
1833   case Builtin::BI__c11_atomic_signal_fence: {
1834     llvm::SyncScope::ID SSID;
1835     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1836         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1837       SSID = llvm::SyncScope::SingleThread;
1838     else
1839       SSID = llvm::SyncScope::System;
1840     Value *Order = EmitScalarExpr(E->getArg(0));
1841     if (isa<llvm::ConstantInt>(Order)) {
1842       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1843       switch (ord) {
1844       case 0:  // memory_order_relaxed
1845       default: // invalid order
1846         break;
1847       case 1:  // memory_order_consume
1848       case 2:  // memory_order_acquire
1849         Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
1850         break;
1851       case 3:  // memory_order_release
1852         Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
1853         break;
1854       case 4:  // memory_order_acq_rel
1855         Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
1856         break;
1857       case 5:  // memory_order_seq_cst
1858         Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
1859         break;
1860       }
1861       return RValue::get(nullptr);
1862     }
1863 
1864     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1865     AcquireBB = createBasicBlock("acquire", CurFn);
1866     ReleaseBB = createBasicBlock("release", CurFn);
1867     AcqRelBB = createBasicBlock("acqrel", CurFn);
1868     SeqCstBB = createBasicBlock("seqcst", CurFn);
1869     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1870 
1871     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1872     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1873 
1874     Builder.SetInsertPoint(AcquireBB);
1875     Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
1876     Builder.CreateBr(ContBB);
1877     SI->addCase(Builder.getInt32(1), AcquireBB);
1878     SI->addCase(Builder.getInt32(2), AcquireBB);
1879 
1880     Builder.SetInsertPoint(ReleaseBB);
1881     Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
1882     Builder.CreateBr(ContBB);
1883     SI->addCase(Builder.getInt32(3), ReleaseBB);
1884 
1885     Builder.SetInsertPoint(AcqRelBB);
1886     Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
1887     Builder.CreateBr(ContBB);
1888     SI->addCase(Builder.getInt32(4), AcqRelBB);
1889 
1890     Builder.SetInsertPoint(SeqCstBB);
1891     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
1892     Builder.CreateBr(ContBB);
1893     SI->addCase(Builder.getInt32(5), SeqCstBB);
1894 
1895     Builder.SetInsertPoint(ContBB);
1896     return RValue::get(nullptr);
1897   }
1898 
1899     // Library functions with special handling.
1900   case Builtin::BIsqrt:
1901   case Builtin::BIsqrtf:
1902   case Builtin::BIsqrtl: {
1903     // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1904     // in finite- or unsafe-math mode (the intrinsic has different semantics
1905     // for handling negative numbers compared to the library function, so
1906     // -fmath-errno=0 is not enough).
1907     if (!FD->hasAttr<ConstAttr>())
1908       break;
1909     if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1910           CGM.getCodeGenOpts().NoNaNsFPMath))
1911       break;
1912     Value *Arg0 = EmitScalarExpr(E->getArg(0));
1913     llvm::Type *ArgType = Arg0->getType();
1914     Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1915     return RValue::get(Builder.CreateCall(F, Arg0));
1916   }
1917 
1918   case Builtin::BI__builtin_pow:
1919   case Builtin::BI__builtin_powf:
1920   case Builtin::BI__builtin_powl:
1921   case Builtin::BIpow:
1922   case Builtin::BIpowf:
1923   case Builtin::BIpowl: {
1924     // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1925     if (!FD->hasAttr<ConstAttr>())
1926       break;
1927     Value *Base = EmitScalarExpr(E->getArg(0));
1928     Value *Exponent = EmitScalarExpr(E->getArg(1));
1929     llvm::Type *ArgType = Base->getType();
1930     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1931     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1932   }
1933 
1934   case Builtin::BIfma:
1935   case Builtin::BIfmaf:
1936   case Builtin::BIfmal:
1937   case Builtin::BI__builtin_fma:
1938   case Builtin::BI__builtin_fmaf:
1939   case Builtin::BI__builtin_fmal: {
1940     // Rewrite fma to intrinsic.
1941     Value *FirstArg = EmitScalarExpr(E->getArg(0));
1942     llvm::Type *ArgType = FirstArg->getType();
1943     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1944     return RValue::get(
1945         Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1946                                EmitScalarExpr(E->getArg(2))}));
1947   }
1948 
1949   case Builtin::BI__builtin_signbit:
1950   case Builtin::BI__builtin_signbitf:
1951   case Builtin::BI__builtin_signbitl: {
1952     return RValue::get(
1953         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1954                            ConvertType(E->getType())));
1955   }
1956   case Builtin::BI__builtin_annotation: {
1957     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1958     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1959                                       AnnVal->getType());
1960 
1961     // Get the annotation string, go through casts. Sema requires this to be a
1962     // non-wide string literal, potentially casted, so the cast<> is safe.
1963     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1964     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1965     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1966   }
1967   case Builtin::BI__builtin_addcb:
1968   case Builtin::BI__builtin_addcs:
1969   case Builtin::BI__builtin_addc:
1970   case Builtin::BI__builtin_addcl:
1971   case Builtin::BI__builtin_addcll:
1972   case Builtin::BI__builtin_subcb:
1973   case Builtin::BI__builtin_subcs:
1974   case Builtin::BI__builtin_subc:
1975   case Builtin::BI__builtin_subcl:
1976   case Builtin::BI__builtin_subcll: {
1977 
1978     // We translate all of these builtins from expressions of the form:
1979     //   int x = ..., y = ..., carryin = ..., carryout, result;
1980     //   result = __builtin_addc(x, y, carryin, &carryout);
1981     //
1982     // to LLVM IR of the form:
1983     //
1984     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1985     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1986     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
1987     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1988     //                                                       i32 %carryin)
1989     //   %result = extractvalue {i32, i1} %tmp2, 0
1990     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
1991     //   %tmp3 = or i1 %carry1, %carry2
1992     //   %tmp4 = zext i1 %tmp3 to i32
1993     //   store i32 %tmp4, i32* %carryout
1994 
1995     // Scalarize our inputs.
1996     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1997     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1998     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1999     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
2000 
2001     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
2002     llvm::Intrinsic::ID IntrinsicId;
2003     switch (BuiltinID) {
2004     default: llvm_unreachable("Unknown multiprecision builtin id.");
2005     case Builtin::BI__builtin_addcb:
2006     case Builtin::BI__builtin_addcs:
2007     case Builtin::BI__builtin_addc:
2008     case Builtin::BI__builtin_addcl:
2009     case Builtin::BI__builtin_addcll:
2010       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2011       break;
2012     case Builtin::BI__builtin_subcb:
2013     case Builtin::BI__builtin_subcs:
2014     case Builtin::BI__builtin_subc:
2015     case Builtin::BI__builtin_subcl:
2016     case Builtin::BI__builtin_subcll:
2017       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2018       break;
2019     }
2020 
2021     // Construct our resulting LLVM IR expression.
2022     llvm::Value *Carry1;
2023     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
2024                                               X, Y, Carry1);
2025     llvm::Value *Carry2;
2026     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
2027                                               Sum1, Carryin, Carry2);
2028     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
2029                                                X->getType());
2030     Builder.CreateStore(CarryOut, CarryOutPtr);
2031     return RValue::get(Sum2);
2032   }
2033 
2034   case Builtin::BI__builtin_add_overflow:
2035   case Builtin::BI__builtin_sub_overflow:
2036   case Builtin::BI__builtin_mul_overflow: {
2037     const clang::Expr *LeftArg = E->getArg(0);
2038     const clang::Expr *RightArg = E->getArg(1);
2039     const clang::Expr *ResultArg = E->getArg(2);
2040 
2041     clang::QualType ResultQTy =
2042         ResultArg->getType()->castAs<PointerType>()->getPointeeType();
2043 
2044     WidthAndSignedness LeftInfo =
2045         getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
2046     WidthAndSignedness RightInfo =
2047         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
2048     WidthAndSignedness ResultInfo =
2049         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
2050     WidthAndSignedness EncompassingInfo =
2051         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
2052 
2053     llvm::Type *EncompassingLLVMTy =
2054         llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
2055 
2056     llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
2057 
2058     llvm::Intrinsic::ID IntrinsicId;
2059     switch (BuiltinID) {
2060     default:
2061       llvm_unreachable("Unknown overflow builtin id.");
2062     case Builtin::BI__builtin_add_overflow:
2063       IntrinsicId = EncompassingInfo.Signed
2064                         ? llvm::Intrinsic::sadd_with_overflow
2065                         : llvm::Intrinsic::uadd_with_overflow;
2066       break;
2067     case Builtin::BI__builtin_sub_overflow:
2068       IntrinsicId = EncompassingInfo.Signed
2069                         ? llvm::Intrinsic::ssub_with_overflow
2070                         : llvm::Intrinsic::usub_with_overflow;
2071       break;
2072     case Builtin::BI__builtin_mul_overflow:
2073       IntrinsicId = EncompassingInfo.Signed
2074                         ? llvm::Intrinsic::smul_with_overflow
2075                         : llvm::Intrinsic::umul_with_overflow;
2076       break;
2077     }
2078 
2079     llvm::Value *Left = EmitScalarExpr(LeftArg);
2080     llvm::Value *Right = EmitScalarExpr(RightArg);
2081     Address ResultPtr = EmitPointerWithAlignment(ResultArg);
2082 
2083     // Extend each operand to the encompassing type.
2084     Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
2085     Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
2086 
2087     // Perform the operation on the extended values.
2088     llvm::Value *Overflow, *Result;
2089     Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
2090 
2091     if (EncompassingInfo.Width > ResultInfo.Width) {
2092       // The encompassing type is wider than the result type, so we need to
2093       // truncate it.
2094       llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
2095 
2096       // To see if the truncation caused an overflow, we will extend
2097       // the result and then compare it to the original result.
2098       llvm::Value *ResultTruncExt = Builder.CreateIntCast(
2099           ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
2100       llvm::Value *TruncationOverflow =
2101           Builder.CreateICmpNE(Result, ResultTruncExt);
2102 
2103       Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
2104       Result = ResultTrunc;
2105     }
2106 
2107     // Finally, store the result using the pointer.
2108     bool isVolatile =
2109       ResultArg->getType()->getPointeeType().isVolatileQualified();
2110     Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
2111 
2112     return RValue::get(Overflow);
2113   }
2114 
2115   case Builtin::BI__builtin_uadd_overflow:
2116   case Builtin::BI__builtin_uaddl_overflow:
2117   case Builtin::BI__builtin_uaddll_overflow:
2118   case Builtin::BI__builtin_usub_overflow:
2119   case Builtin::BI__builtin_usubl_overflow:
2120   case Builtin::BI__builtin_usubll_overflow:
2121   case Builtin::BI__builtin_umul_overflow:
2122   case Builtin::BI__builtin_umull_overflow:
2123   case Builtin::BI__builtin_umulll_overflow:
2124   case Builtin::BI__builtin_sadd_overflow:
2125   case Builtin::BI__builtin_saddl_overflow:
2126   case Builtin::BI__builtin_saddll_overflow:
2127   case Builtin::BI__builtin_ssub_overflow:
2128   case Builtin::BI__builtin_ssubl_overflow:
2129   case Builtin::BI__builtin_ssubll_overflow:
2130   case Builtin::BI__builtin_smul_overflow:
2131   case Builtin::BI__builtin_smull_overflow:
2132   case Builtin::BI__builtin_smulll_overflow: {
2133 
2134     // We translate all of these builtins directly to the relevant llvm IR node.
2135 
2136     // Scalarize our inputs.
2137     llvm::Value *X = EmitScalarExpr(E->getArg(0));
2138     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2139     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
2140 
2141     // Decide which of the overflow intrinsics we are lowering to:
2142     llvm::Intrinsic::ID IntrinsicId;
2143     switch (BuiltinID) {
2144     default: llvm_unreachable("Unknown overflow builtin id.");
2145     case Builtin::BI__builtin_uadd_overflow:
2146     case Builtin::BI__builtin_uaddl_overflow:
2147     case Builtin::BI__builtin_uaddll_overflow:
2148       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2149       break;
2150     case Builtin::BI__builtin_usub_overflow:
2151     case Builtin::BI__builtin_usubl_overflow:
2152     case Builtin::BI__builtin_usubll_overflow:
2153       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2154       break;
2155     case Builtin::BI__builtin_umul_overflow:
2156     case Builtin::BI__builtin_umull_overflow:
2157     case Builtin::BI__builtin_umulll_overflow:
2158       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
2159       break;
2160     case Builtin::BI__builtin_sadd_overflow:
2161     case Builtin::BI__builtin_saddl_overflow:
2162     case Builtin::BI__builtin_saddll_overflow:
2163       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
2164       break;
2165     case Builtin::BI__builtin_ssub_overflow:
2166     case Builtin::BI__builtin_ssubl_overflow:
2167     case Builtin::BI__builtin_ssubll_overflow:
2168       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
2169       break;
2170     case Builtin::BI__builtin_smul_overflow:
2171     case Builtin::BI__builtin_smull_overflow:
2172     case Builtin::BI__builtin_smulll_overflow:
2173       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
2174       break;
2175     }
2176 
2177 
2178     llvm::Value *Carry;
2179     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
2180     Builder.CreateStore(Sum, SumOutPtr);
2181 
2182     return RValue::get(Carry);
2183   }
2184   case Builtin::BI__builtin_addressof:
2185     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
2186   case Builtin::BI__builtin_operator_new:
2187     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2188                                     E->getArg(0), false);
2189   case Builtin::BI__builtin_operator_delete:
2190     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2191                                     E->getArg(0), true);
2192   case Builtin::BI__noop:
2193     // __noop always evaluates to an integer literal zero.
2194     return RValue::get(ConstantInt::get(IntTy, 0));
2195   case Builtin::BI__builtin_call_with_static_chain: {
2196     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
2197     const Expr *Chain = E->getArg(1);
2198     return EmitCall(Call->getCallee()->getType(),
2199                     EmitCallee(Call->getCallee()), Call, ReturnValue,
2200                     EmitScalarExpr(Chain));
2201   }
2202   case Builtin::BI_InterlockedExchange8:
2203   case Builtin::BI_InterlockedExchange16:
2204   case Builtin::BI_InterlockedExchange:
2205   case Builtin::BI_InterlockedExchangePointer:
2206     return RValue::get(
2207         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
2208   case Builtin::BI_InterlockedCompareExchangePointer: {
2209     llvm::Type *RTy;
2210     llvm::IntegerType *IntType =
2211       IntegerType::get(getLLVMContext(),
2212                        getContext().getTypeSize(E->getType()));
2213     llvm::Type *IntPtrType = IntType->getPointerTo();
2214 
2215     llvm::Value *Destination =
2216       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
2217 
2218     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
2219     RTy = Exchange->getType();
2220     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
2221 
2222     llvm::Value *Comparand =
2223       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
2224 
2225     auto Result =
2226         Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
2227                                     AtomicOrdering::SequentiallyConsistent,
2228                                     AtomicOrdering::SequentiallyConsistent);
2229     Result->setVolatile(true);
2230 
2231     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
2232                                                                          0),
2233                                               RTy));
2234   }
2235   case Builtin::BI_InterlockedCompareExchange8:
2236   case Builtin::BI_InterlockedCompareExchange16:
2237   case Builtin::BI_InterlockedCompareExchange:
2238   case Builtin::BI_InterlockedCompareExchange64: {
2239     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
2240         EmitScalarExpr(E->getArg(0)),
2241         EmitScalarExpr(E->getArg(2)),
2242         EmitScalarExpr(E->getArg(1)),
2243         AtomicOrdering::SequentiallyConsistent,
2244         AtomicOrdering::SequentiallyConsistent);
2245       CXI->setVolatile(true);
2246       return RValue::get(Builder.CreateExtractValue(CXI, 0));
2247   }
2248   case Builtin::BI_InterlockedIncrement16:
2249   case Builtin::BI_InterlockedIncrement:
2250     return RValue::get(
2251         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
2252   case Builtin::BI_InterlockedDecrement16:
2253   case Builtin::BI_InterlockedDecrement:
2254     return RValue::get(
2255         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
2256   case Builtin::BI_InterlockedAnd8:
2257   case Builtin::BI_InterlockedAnd16:
2258   case Builtin::BI_InterlockedAnd:
2259     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
2260   case Builtin::BI_InterlockedExchangeAdd8:
2261   case Builtin::BI_InterlockedExchangeAdd16:
2262   case Builtin::BI_InterlockedExchangeAdd:
2263     return RValue::get(
2264         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
2265   case Builtin::BI_InterlockedExchangeSub8:
2266   case Builtin::BI_InterlockedExchangeSub16:
2267   case Builtin::BI_InterlockedExchangeSub:
2268     return RValue::get(
2269         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
2270   case Builtin::BI_InterlockedOr8:
2271   case Builtin::BI_InterlockedOr16:
2272   case Builtin::BI_InterlockedOr:
2273     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
2274   case Builtin::BI_InterlockedXor8:
2275   case Builtin::BI_InterlockedXor16:
2276   case Builtin::BI_InterlockedXor:
2277     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
2278   case Builtin::BI_interlockedbittestandset:
2279     return RValue::get(
2280         EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
2281 
2282   case Builtin::BI__exception_code:
2283   case Builtin::BI_exception_code:
2284     return RValue::get(EmitSEHExceptionCode());
2285   case Builtin::BI__exception_info:
2286   case Builtin::BI_exception_info:
2287     return RValue::get(EmitSEHExceptionInfo());
2288   case Builtin::BI__abnormal_termination:
2289   case Builtin::BI_abnormal_termination:
2290     return RValue::get(EmitSEHAbnormalTermination());
2291   case Builtin::BI_setjmpex: {
2292     if (getTarget().getTriple().isOSMSVCRT()) {
2293       llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2294       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2295           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2296           llvm::Attribute::ReturnsTwice);
2297       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2298           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2299           "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
2300       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2301           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2302       llvm::Value *FrameAddr =
2303           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2304                              ConstantInt::get(Int32Ty, 0));
2305       llvm::Value *Args[] = {Buf, FrameAddr};
2306       llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2307       CS.setAttributes(ReturnsTwiceAttr);
2308       return RValue::get(CS.getInstruction());
2309     }
2310     break;
2311   }
2312   case Builtin::BI_setjmp: {
2313     if (getTarget().getTriple().isOSMSVCRT()) {
2314       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2315           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2316           llvm::Attribute::ReturnsTwice);
2317       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2318           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2319       llvm::CallSite CS;
2320       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2321         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2322         llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2323             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2324             "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
2325         llvm::Value *Count = ConstantInt::get(IntTy, 0);
2326         llvm::Value *Args[] = {Buf, Count};
2327         CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2328       } else {
2329         llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2330         llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2331             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2332             "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
2333         llvm::Value *FrameAddr =
2334             Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2335                                ConstantInt::get(Int32Ty, 0));
2336         llvm::Value *Args[] = {Buf, FrameAddr};
2337         CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2338       }
2339       CS.setAttributes(ReturnsTwiceAttr);
2340       return RValue::get(CS.getInstruction());
2341     }
2342     break;
2343   }
2344 
2345   case Builtin::BI__GetExceptionInfo: {
2346     if (llvm::GlobalVariable *GV =
2347             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2348       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2349     break;
2350   }
2351 
2352   case Builtin::BI__fastfail:
2353     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
2354 
2355   case Builtin::BI__builtin_coro_size: {
2356     auto & Context = getContext();
2357     auto SizeTy = Context.getSizeType();
2358     auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
2359     Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
2360     return RValue::get(Builder.CreateCall(F));
2361   }
2362 
2363   case Builtin::BI__builtin_coro_id:
2364     return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
2365   case Builtin::BI__builtin_coro_promise:
2366     return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
2367   case Builtin::BI__builtin_coro_resume:
2368     return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
2369   case Builtin::BI__builtin_coro_frame:
2370     return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
2371   case Builtin::BI__builtin_coro_free:
2372     return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
2373   case Builtin::BI__builtin_coro_destroy:
2374     return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
2375   case Builtin::BI__builtin_coro_done:
2376     return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
2377   case Builtin::BI__builtin_coro_alloc:
2378     return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
2379   case Builtin::BI__builtin_coro_begin:
2380     return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
2381   case Builtin::BI__builtin_coro_end:
2382     return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
2383   case Builtin::BI__builtin_coro_suspend:
2384     return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
2385   case Builtin::BI__builtin_coro_param:
2386     return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
2387 
2388   // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2389   case Builtin::BIread_pipe:
2390   case Builtin::BIwrite_pipe: {
2391     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2392           *Arg1 = EmitScalarExpr(E->getArg(1));
2393     CGOpenCLRuntime OpenCLRT(CGM);
2394     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2395     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2396 
2397     // Type of the generic packet parameter.
2398     unsigned GenericAS =
2399         getContext().getTargetAddressSpace(LangAS::opencl_generic);
2400     llvm::Type *I8PTy = llvm::PointerType::get(
2401         llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2402 
2403     // Testing which overloaded version we should generate the call for.
2404     if (2U == E->getNumArgs()) {
2405       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2406                                                              : "__write_pipe_2";
2407       // Creating a generic function type to be able to call with any builtin or
2408       // user defined type.
2409       llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
2410       llvm::FunctionType *FTy = llvm::FunctionType::get(
2411           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2412       Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2413       return RValue::get(
2414           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2415                              {Arg0, BCast, PacketSize, PacketAlign}));
2416     } else {
2417       assert(4 == E->getNumArgs() &&
2418              "Illegal number of parameters to pipe function");
2419       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2420                                                              : "__write_pipe_4";
2421 
2422       llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
2423                               Int32Ty, Int32Ty};
2424       Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2425             *Arg3 = EmitScalarExpr(E->getArg(3));
2426       llvm::FunctionType *FTy = llvm::FunctionType::get(
2427           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2428       Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2429       // We know the third argument is an integer type, but we may need to cast
2430       // it to i32.
2431       if (Arg2->getType() != Int32Ty)
2432         Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2433       return RValue::get(Builder.CreateCall(
2434           CGM.CreateRuntimeFunction(FTy, Name),
2435           {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
2436     }
2437   }
2438   // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2439   // functions
2440   case Builtin::BIreserve_read_pipe:
2441   case Builtin::BIreserve_write_pipe:
2442   case Builtin::BIwork_group_reserve_read_pipe:
2443   case Builtin::BIwork_group_reserve_write_pipe:
2444   case Builtin::BIsub_group_reserve_read_pipe:
2445   case Builtin::BIsub_group_reserve_write_pipe: {
2446     // Composing the mangled name for the function.
2447     const char *Name;
2448     if (BuiltinID == Builtin::BIreserve_read_pipe)
2449       Name = "__reserve_read_pipe";
2450     else if (BuiltinID == Builtin::BIreserve_write_pipe)
2451       Name = "__reserve_write_pipe";
2452     else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2453       Name = "__work_group_reserve_read_pipe";
2454     else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2455       Name = "__work_group_reserve_write_pipe";
2456     else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2457       Name = "__sub_group_reserve_read_pipe";
2458     else
2459       Name = "__sub_group_reserve_write_pipe";
2460 
2461     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2462           *Arg1 = EmitScalarExpr(E->getArg(1));
2463     llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2464     CGOpenCLRuntime OpenCLRT(CGM);
2465     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2466     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2467 
2468     // Building the generic function prototype.
2469     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
2470     llvm::FunctionType *FTy = llvm::FunctionType::get(
2471         ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2472     // We know the second argument is an integer type, but we may need to cast
2473     // it to i32.
2474     if (Arg1->getType() != Int32Ty)
2475       Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2476     return RValue::get(
2477         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2478                            {Arg0, Arg1, PacketSize, PacketAlign}));
2479   }
2480   // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2481   // functions
2482   case Builtin::BIcommit_read_pipe:
2483   case Builtin::BIcommit_write_pipe:
2484   case Builtin::BIwork_group_commit_read_pipe:
2485   case Builtin::BIwork_group_commit_write_pipe:
2486   case Builtin::BIsub_group_commit_read_pipe:
2487   case Builtin::BIsub_group_commit_write_pipe: {
2488     const char *Name;
2489     if (BuiltinID == Builtin::BIcommit_read_pipe)
2490       Name = "__commit_read_pipe";
2491     else if (BuiltinID == Builtin::BIcommit_write_pipe)
2492       Name = "__commit_write_pipe";
2493     else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2494       Name = "__work_group_commit_read_pipe";
2495     else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2496       Name = "__work_group_commit_write_pipe";
2497     else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2498       Name = "__sub_group_commit_read_pipe";
2499     else
2500       Name = "__sub_group_commit_write_pipe";
2501 
2502     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2503           *Arg1 = EmitScalarExpr(E->getArg(1));
2504     CGOpenCLRuntime OpenCLRT(CGM);
2505     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2506     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2507 
2508     // Building the generic function prototype.
2509     llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
2510     llvm::FunctionType *FTy =
2511         llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2512                                 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2513 
2514     return RValue::get(
2515         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2516                            {Arg0, Arg1, PacketSize, PacketAlign}));
2517   }
2518   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2519   case Builtin::BIget_pipe_num_packets:
2520   case Builtin::BIget_pipe_max_packets: {
2521     const char *Name;
2522     if (BuiltinID == Builtin::BIget_pipe_num_packets)
2523       Name = "__get_pipe_num_packets";
2524     else
2525       Name = "__get_pipe_max_packets";
2526 
2527     // Building the generic function prototype.
2528     Value *Arg0 = EmitScalarExpr(E->getArg(0));
2529     CGOpenCLRuntime OpenCLRT(CGM);
2530     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2531     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2532     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
2533     llvm::FunctionType *FTy = llvm::FunctionType::get(
2534         Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2535 
2536     return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2537                                           {Arg0, PacketSize, PacketAlign}));
2538   }
2539 
2540   // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2541   case Builtin::BIto_global:
2542   case Builtin::BIto_local:
2543   case Builtin::BIto_private: {
2544     auto Arg0 = EmitScalarExpr(E->getArg(0));
2545     auto NewArgT = llvm::PointerType::get(Int8Ty,
2546       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2547     auto NewRetT = llvm::PointerType::get(Int8Ty,
2548       CGM.getContext().getTargetAddressSpace(
2549         E->getType()->getPointeeType().getAddressSpace()));
2550     auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2551     llvm::Value *NewArg;
2552     if (Arg0->getType()->getPointerAddressSpace() !=
2553         NewArgT->getPointerAddressSpace())
2554       NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2555     else
2556       NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2557     auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2558     auto NewCall =
2559         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2560     return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2561       ConvertType(E->getType())));
2562   }
2563 
2564   // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2565   // It contains four different overload formats specified in Table 6.13.17.1.
2566   case Builtin::BIenqueue_kernel: {
2567     StringRef Name; // Generated function call name
2568     unsigned NumArgs = E->getNumArgs();
2569 
2570     llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2571     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2572         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2573 
2574     llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2575     llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2576     LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
2577     llvm::Value *Range = NDRangeL.getAddress().getPointer();
2578     llvm::Type *RangeTy = NDRangeL.getAddress().getType();
2579 
2580     if (NumArgs == 4) {
2581       // The most basic form of the call with parameters:
2582       // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2583       Name = "__enqueue_kernel_basic";
2584       llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy};
2585       llvm::FunctionType *FTy = llvm::FunctionType::get(
2586           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
2587 
2588       llvm::Value *Block = Builder.CreatePointerCast(
2589           EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
2590 
2591       AttrBuilder B;
2592       B.addAttribute(Attribute::ByVal);
2593       llvm::AttributeList ByValAttrSet =
2594           llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
2595 
2596       auto RTCall =
2597           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
2598                              {Queue, Flags, Range, Block});
2599       RTCall->setAttributes(ByValAttrSet);
2600       return RValue::get(RTCall);
2601     }
2602     assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2603 
2604     // Could have events and/or vaargs.
2605     if (E->getArg(3)->getType()->isBlockPointerType()) {
2606       // No events passed, but has variadic arguments.
2607       Name = "__enqueue_kernel_vaargs";
2608       llvm::Value *Block = Builder.CreatePointerCast(
2609           EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
2610       // Create a vector of the arguments, as well as a constant value to
2611       // express to the runtime the number of variadic arguments.
2612       std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
2613                                          ConstantInt::get(IntTy, NumArgs - 4)};
2614       std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy,
2615                                           GenericVoidPtrTy, IntTy};
2616 
2617       // Each of the following arguments specifies the size of the corresponding
2618       // argument passed to the enqueued block.
2619       for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I)
2620         Args.push_back(
2621             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
2622 
2623       llvm::FunctionType *FTy = llvm::FunctionType::get(
2624           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2625       return RValue::get(
2626           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2627                              llvm::ArrayRef<llvm::Value *>(Args)));
2628     }
2629     // Any calls now have event arguments passed.
2630     if (NumArgs >= 7) {
2631       llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2632       llvm::Type *EventPtrTy = EventTy->getPointerTo(
2633           CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2634 
2635       llvm::Value *NumEvents =
2636           Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
2637       llvm::Value *EventList =
2638           E->getArg(4)->getType()->isArrayType()
2639               ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2640               : EmitScalarExpr(E->getArg(4));
2641       llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2642       // Convert to generic address space.
2643       EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
2644       ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
2645       llvm::Value *Block = Builder.CreatePointerCast(
2646           EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy);
2647 
2648       std::vector<llvm::Type *> ArgTys = {
2649           QueueTy,    Int32Ty,    RangeTy,         Int32Ty,
2650           EventPtrTy, EventPtrTy, GenericVoidPtrTy};
2651 
2652       std::vector<llvm::Value *> Args = {Queue,     Flags,    Range, NumEvents,
2653                                          EventList, ClkEvent, Block};
2654 
2655       if (NumArgs == 7) {
2656         // Has events but no variadics.
2657         Name = "__enqueue_kernel_basic_events";
2658         llvm::FunctionType *FTy = llvm::FunctionType::get(
2659             Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2660         return RValue::get(
2661             Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2662                                llvm::ArrayRef<llvm::Value *>(Args)));
2663       }
2664       // Has event info and variadics
2665       // Pass the number of variadics to the runtime function too.
2666       Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2667       ArgTys.push_back(Int32Ty);
2668       Name = "__enqueue_kernel_events_vaargs";
2669 
2670       // Each of the following arguments specifies the size of the corresponding
2671       // argument passed to the enqueued block.
2672       for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I)
2673         Args.push_back(
2674             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
2675 
2676       llvm::FunctionType *FTy = llvm::FunctionType::get(
2677           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2678       return RValue::get(
2679           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2680                              llvm::ArrayRef<llvm::Value *>(Args)));
2681     }
2682     LLVM_FALLTHROUGH;
2683   }
2684   // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2685   // parameter.
2686   case Builtin::BIget_kernel_work_group_size: {
2687     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2688         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2689     Value *Arg = EmitScalarExpr(E->getArg(0));
2690     Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2691     return RValue::get(Builder.CreateCall(
2692         CGM.CreateRuntimeFunction(
2693             llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2694             "__get_kernel_work_group_size_impl"),
2695         Arg));
2696   }
2697   case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
2698     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2699         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2700     Value *Arg = EmitScalarExpr(E->getArg(0));
2701     Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2702     return RValue::get(Builder.CreateCall(
2703         CGM.CreateRuntimeFunction(
2704             llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2705             "__get_kernel_preferred_work_group_multiple_impl"),
2706         Arg));
2707   }
2708   case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
2709   case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
2710     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2711         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2712     LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
2713     llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
2714     Value *Block = EmitScalarExpr(E->getArg(1));
2715     Block = Builder.CreatePointerCast(Block, GenericVoidPtrTy);
2716     const char *Name =
2717         BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
2718             ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
2719             : "__get_kernel_sub_group_count_for_ndrange_impl";
2720     return RValue::get(Builder.CreateCall(
2721         CGM.CreateRuntimeFunction(
2722             llvm::FunctionType::get(
2723                 IntTy, {NDRange->getType(), GenericVoidPtrTy}, false),
2724             Name),
2725         {NDRange, Block}));
2726   }
2727   case Builtin::BIprintf:
2728     if (getTarget().getTriple().isNVPTX())
2729       return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
2730     break;
2731   case Builtin::BI__builtin_canonicalize:
2732   case Builtin::BI__builtin_canonicalizef:
2733   case Builtin::BI__builtin_canonicalizel:
2734     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2735 
2736   case Builtin::BI__builtin_thread_pointer: {
2737     if (!getContext().getTargetInfo().isTLSSupported())
2738       CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
2739     // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
2740     break;
2741   }
2742   case Builtin::BI__builtin_os_log_format: {
2743     assert(E->getNumArgs() >= 2 &&
2744            "__builtin_os_log_format takes at least 2 arguments");
2745     analyze_os_log::OSLogBufferLayout Layout;
2746     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2747     Address BufAddr = EmitPointerWithAlignment(E->getArg(0));
2748     // Ignore argument 1, the format string. It is not currently used.
2749     CharUnits Offset;
2750     Builder.CreateStore(
2751         Builder.getInt8(Layout.getSummaryByte()),
2752         Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2753     Builder.CreateStore(
2754         Builder.getInt8(Layout.getNumArgsByte()),
2755         Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2756 
2757     llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2758     for (const auto &Item : Layout.Items) {
2759       Builder.CreateStore(
2760           Builder.getInt8(Item.getDescriptorByte()),
2761           Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2762       Builder.CreateStore(
2763           Builder.getInt8(Item.getSizeByte()),
2764           Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2765       Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset);
2766       if (const Expr *TheExpr = Item.getExpr()) {
2767         Addr = Builder.CreateElementBitCast(
2768             Addr, ConvertTypeForMem(TheExpr->getType()));
2769         // Check if this is a retainable type.
2770         if (TheExpr->getType()->isObjCRetainableType()) {
2771           assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2772                  "Only scalar can be a ObjC retainable type");
2773           llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2774           RValue RV = RValue::get(SV);
2775           LValue LV = MakeAddrLValue(Addr, TheExpr->getType());
2776           EmitStoreThroughLValue(RV, LV);
2777           // Check if the object is constant, if not, save it in
2778           // RetainableOperands.
2779           if (!isa<Constant>(SV))
2780             RetainableOperands.push_back(SV);
2781         } else {
2782           EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true);
2783         }
2784       } else {
2785         Addr = Builder.CreateElementBitCast(Addr, Int32Ty);
2786         Builder.CreateStore(
2787             Builder.getInt32(Item.getConstValue().getQuantity()), Addr);
2788       }
2789       Offset += Item.size();
2790     }
2791 
2792     // Push a clang.arc.use cleanup for each object in RetainableOperands. The
2793     // cleanup will cause the use to appear after the final log call, keeping
2794     // the object valid while it's held in the log buffer.  Note that if there's
2795     // a release cleanup on the object, it will already be active; since
2796     // cleanups are emitted in reverse order, the use will occur before the
2797     // object is released.
2798     if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
2799         CGM.getCodeGenOpts().OptimizationLevel != 0)
2800       for (llvm::Value *object : RetainableOperands)
2801         pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object);
2802 
2803     return RValue::get(BufAddr.getPointer());
2804   }
2805 
2806   case Builtin::BI__builtin_os_log_format_buffer_size: {
2807     analyze_os_log::OSLogBufferLayout Layout;
2808     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2809     return RValue::get(ConstantInt::get(ConvertType(E->getType()),
2810                                         Layout.size().getQuantity()));
2811   }
2812 
2813   case Builtin::BI__xray_customevent: {
2814     if (!ShouldXRayInstrumentFunction())
2815       return RValue::getIgnored();
2816     if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) {
2817       if (XRayAttr->neverXRayInstrument())
2818         return RValue::getIgnored();
2819     }
2820     Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
2821     auto FTy = F->getFunctionType();
2822     auto Arg0 = E->getArg(0);
2823     auto Arg0Val = EmitScalarExpr(Arg0);
2824     auto Arg0Ty = Arg0->getType();
2825     auto PTy0 = FTy->getParamType(0);
2826     if (PTy0 != Arg0Val->getType()) {
2827       if (Arg0Ty->isArrayType())
2828         Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
2829       else
2830         Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
2831     }
2832     auto Arg1 = EmitScalarExpr(E->getArg(1));
2833     auto PTy1 = FTy->getParamType(1);
2834     if (PTy1 != Arg1->getType())
2835       Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
2836     return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
2837   }
2838 
2839   case Builtin::BI__builtin_ms_va_start:
2840   case Builtin::BI__builtin_ms_va_end:
2841     return RValue::get(
2842         EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
2843                        BuiltinID == Builtin::BI__builtin_ms_va_start));
2844 
2845   case Builtin::BI__builtin_ms_va_copy: {
2846     // Lower this manually. We can't reliably determine whether or not any
2847     // given va_copy() is for a Win64 va_list from the calling convention
2848     // alone, because it's legal to do this from a System V ABI function.
2849     // With opaque pointer types, we won't have enough information in LLVM
2850     // IR to determine this from the argument types, either. Best to do it
2851     // now, while we have enough information.
2852     Address DestAddr = EmitMSVAListRef(E->getArg(0));
2853     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
2854 
2855     llvm::Type *BPP = Int8PtrPtrTy;
2856 
2857     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
2858                        DestAddr.getAlignment());
2859     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
2860                       SrcAddr.getAlignment());
2861 
2862     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
2863     return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
2864   }
2865   }
2866 
2867   // If this is an alias for a lib function (e.g. __builtin_sin), emit
2868   // the call using the normal call path, but using the unmangled
2869   // version of the function name.
2870   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
2871     return emitLibraryCall(*this, FD, E,
2872                            CGM.getBuiltinLibFunction(FD, BuiltinID));
2873 
2874   // If this is a predefined lib function (e.g. malloc), emit the call
2875   // using exactly the normal call path.
2876   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
2877     return emitLibraryCall(*this, FD, E,
2878                       cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
2879 
2880   // Check that a call to a target specific builtin has the correct target
2881   // features.
2882   // This is down here to avoid non-target specific builtins, however, if
2883   // generic builtins start to require generic target features then we
2884   // can move this up to the beginning of the function.
2885   checkTargetFeatures(E, FD);
2886 
2887   // See if we have a target specific intrinsic.
2888   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
2889   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
2890   StringRef Prefix =
2891       llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
2892   if (!Prefix.empty()) {
2893     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
2894     // NOTE we dont need to perform a compatibility flag check here since the
2895     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
2896     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
2897     if (IntrinsicID == Intrinsic::not_intrinsic)
2898       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
2899   }
2900 
2901   if (IntrinsicID != Intrinsic::not_intrinsic) {
2902     SmallVector<Value*, 16> Args;
2903 
2904     // Find out if any arguments are required to be integer constant
2905     // expressions.
2906     unsigned ICEArguments = 0;
2907     ASTContext::GetBuiltinTypeError Error;
2908     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2909     assert(Error == ASTContext::GE_None && "Should not codegen an error");
2910 
2911     Function *F = CGM.getIntrinsic(IntrinsicID);
2912     llvm::FunctionType *FTy = F->getFunctionType();
2913 
2914     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
2915       Value *ArgValue;
2916       // If this is a normal argument, just emit it as a scalar.
2917       if ((ICEArguments & (1 << i)) == 0) {
2918         ArgValue = EmitScalarExpr(E->getArg(i));
2919       } else {
2920         // If this is required to be a constant, constant fold it so that we
2921         // know that the generated intrinsic gets a ConstantInt.
2922         llvm::APSInt Result;
2923         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2924         assert(IsConst && "Constant arg isn't actually constant?");
2925         (void)IsConst;
2926         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2927       }
2928 
2929       // If the intrinsic arg type is different from the builtin arg type
2930       // we need to do a bit cast.
2931       llvm::Type *PTy = FTy->getParamType(i);
2932       if (PTy != ArgValue->getType()) {
2933         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2934                "Must be able to losslessly bit cast to param");
2935         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2936       }
2937 
2938       Args.push_back(ArgValue);
2939     }
2940 
2941     Value *V = Builder.CreateCall(F, Args);
2942     QualType BuiltinRetType = E->getType();
2943 
2944     llvm::Type *RetTy = VoidTy;
2945     if (!BuiltinRetType->isVoidType())
2946       RetTy = ConvertType(BuiltinRetType);
2947 
2948     if (RetTy != V->getType()) {
2949       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
2950              "Must be able to losslessly bit cast result type");
2951       V = Builder.CreateBitCast(V, RetTy);
2952     }
2953 
2954     return RValue::get(V);
2955   }
2956 
2957   // See if we have a target specific builtin that needs to be lowered.
2958   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
2959     return RValue::get(V);
2960 
2961   ErrorUnsupported(E, "builtin function");
2962 
2963   // Unknown builtin, for now just dump it out and return undef.
2964   return GetUndefRValue(E->getType());
2965 }
2966 
2967 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
2968                                         unsigned BuiltinID, const CallExpr *E,
2969                                         llvm::Triple::ArchType Arch) {
2970   switch (Arch) {
2971   case llvm::Triple::arm:
2972   case llvm::Triple::armeb:
2973   case llvm::Triple::thumb:
2974   case llvm::Triple::thumbeb:
2975     return CGF->EmitARMBuiltinExpr(BuiltinID, E);
2976   case llvm::Triple::aarch64:
2977   case llvm::Triple::aarch64_be:
2978     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
2979   case llvm::Triple::x86:
2980   case llvm::Triple::x86_64:
2981     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
2982   case llvm::Triple::ppc:
2983   case llvm::Triple::ppc64:
2984   case llvm::Triple::ppc64le:
2985     return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
2986   case llvm::Triple::r600:
2987   case llvm::Triple::amdgcn:
2988     return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
2989   case llvm::Triple::systemz:
2990     return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
2991   case llvm::Triple::nvptx:
2992   case llvm::Triple::nvptx64:
2993     return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
2994   case llvm::Triple::wasm32:
2995   case llvm::Triple::wasm64:
2996     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
2997   default:
2998     return nullptr;
2999   }
3000 }
3001 
3002 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
3003                                               const CallExpr *E) {
3004   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
3005     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
3006     return EmitTargetArchBuiltinExpr(
3007         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
3008         getContext().getAuxTargetInfo()->getTriple().getArch());
3009   }
3010 
3011   return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
3012                                    getTarget().getTriple().getArch());
3013 }
3014 
3015 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
3016                                      NeonTypeFlags TypeFlags,
3017                                      bool V1Ty=false) {
3018   int IsQuad = TypeFlags.isQuad();
3019   switch (TypeFlags.getEltType()) {
3020   case NeonTypeFlags::Int8:
3021   case NeonTypeFlags::Poly8:
3022     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
3023   case NeonTypeFlags::Int16:
3024   case NeonTypeFlags::Poly16:
3025   case NeonTypeFlags::Float16:
3026     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
3027   case NeonTypeFlags::Int32:
3028     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
3029   case NeonTypeFlags::Int64:
3030   case NeonTypeFlags::Poly64:
3031     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
3032   case NeonTypeFlags::Poly128:
3033     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
3034     // There is a lot of i128 and f128 API missing.
3035     // so we use v16i8 to represent poly128 and get pattern matched.
3036     return llvm::VectorType::get(CGF->Int8Ty, 16);
3037   case NeonTypeFlags::Float32:
3038     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
3039   case NeonTypeFlags::Float64:
3040     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
3041   }
3042   llvm_unreachable("Unknown vector element type!");
3043 }
3044 
3045 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
3046                                           NeonTypeFlags IntTypeFlags) {
3047   int IsQuad = IntTypeFlags.isQuad();
3048   switch (IntTypeFlags.getEltType()) {
3049   case NeonTypeFlags::Int32:
3050     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
3051   case NeonTypeFlags::Int64:
3052     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
3053   default:
3054     llvm_unreachable("Type can't be converted to floating-point!");
3055   }
3056 }
3057 
3058 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
3059   unsigned nElts = V->getType()->getVectorNumElements();
3060   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
3061   return Builder.CreateShuffleVector(V, V, SV, "lane");
3062 }
3063 
3064 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
3065                                      const char *name,
3066                                      unsigned shift, bool rightshift) {
3067   unsigned j = 0;
3068   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3069        ai != ae; ++ai, ++j)
3070     if (shift > 0 && shift == j)
3071       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
3072     else
3073       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
3074 
3075   return Builder.CreateCall(F, Ops, name);
3076 }
3077 
3078 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
3079                                             bool neg) {
3080   int SV = cast<ConstantInt>(V)->getSExtValue();
3081   return ConstantInt::get(Ty, neg ? -SV : SV);
3082 }
3083 
3084 // \brief Right-shift a vector by a constant.
3085 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
3086                                           llvm::Type *Ty, bool usgn,
3087                                           const char *name) {
3088   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
3089 
3090   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
3091   int EltSize = VTy->getScalarSizeInBits();
3092 
3093   Vec = Builder.CreateBitCast(Vec, Ty);
3094 
3095   // lshr/ashr are undefined when the shift amount is equal to the vector
3096   // element size.
3097   if (ShiftAmt == EltSize) {
3098     if (usgn) {
3099       // Right-shifting an unsigned value by its size yields 0.
3100       return llvm::ConstantAggregateZero::get(VTy);
3101     } else {
3102       // Right-shifting a signed value by its size is equivalent
3103       // to a shift of size-1.
3104       --ShiftAmt;
3105       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
3106     }
3107   }
3108 
3109   Shift = EmitNeonShiftVector(Shift, Ty, false);
3110   if (usgn)
3111     return Builder.CreateLShr(Vec, Shift, name);
3112   else
3113     return Builder.CreateAShr(Vec, Shift, name);
3114 }
3115 
3116 enum {
3117   AddRetType = (1 << 0),
3118   Add1ArgType = (1 << 1),
3119   Add2ArgTypes = (1 << 2),
3120 
3121   VectorizeRetType = (1 << 3),
3122   VectorizeArgTypes = (1 << 4),
3123 
3124   InventFloatType = (1 << 5),
3125   UnsignedAlts = (1 << 6),
3126 
3127   Use64BitVectors = (1 << 7),
3128   Use128BitVectors = (1 << 8),
3129 
3130   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
3131   VectorRet = AddRetType | VectorizeRetType,
3132   VectorRetGetArgs01 =
3133       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
3134   FpCmpzModifiers =
3135       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
3136 };
3137 
3138 namespace {
3139 struct NeonIntrinsicInfo {
3140   const char *NameHint;
3141   unsigned BuiltinID;
3142   unsigned LLVMIntrinsic;
3143   unsigned AltLLVMIntrinsic;
3144   unsigned TypeModifier;
3145 
3146   bool operator<(unsigned RHSBuiltinID) const {
3147     return BuiltinID < RHSBuiltinID;
3148   }
3149   bool operator<(const NeonIntrinsicInfo &TE) const {
3150     return BuiltinID < TE.BuiltinID;
3151   }
3152 };
3153 } // end anonymous namespace
3154 
3155 #define NEONMAP0(NameBase) \
3156   { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
3157 
3158 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
3159   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3160       Intrinsic::LLVMIntrinsic, 0, TypeModifier }
3161 
3162 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
3163   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3164       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
3165       TypeModifier }
3166 
3167 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
3168   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3169   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3170   NEONMAP1(vabs_v, arm_neon_vabs, 0),
3171   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
3172   NEONMAP0(vaddhn_v),
3173   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
3174   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
3175   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
3176   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
3177   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
3178   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
3179   NEONMAP1(vcage_v, arm_neon_vacge, 0),
3180   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
3181   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
3182   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
3183   NEONMAP1(vcale_v, arm_neon_vacge, 0),
3184   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
3185   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
3186   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
3187   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
3188   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
3189   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3190   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3191   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3192   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3193   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
3194   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
3195   NEONMAP0(vcvt_f32_v),
3196   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3197   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3198   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3199   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3200   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3201   NEONMAP0(vcvt_s32_v),
3202   NEONMAP0(vcvt_s64_v),
3203   NEONMAP0(vcvt_u32_v),
3204   NEONMAP0(vcvt_u64_v),
3205   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
3206   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
3207   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
3208   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
3209   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
3210   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
3211   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
3212   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
3213   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
3214   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
3215   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
3216   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
3217   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
3218   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
3219   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
3220   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
3221   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
3222   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
3223   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
3224   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
3225   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
3226   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
3227   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
3228   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
3229   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
3230   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
3231   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
3232   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
3233   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
3234   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
3235   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
3236   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
3237   NEONMAP0(vcvtq_f32_v),
3238   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3239   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3240   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3241   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3242   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3243   NEONMAP0(vcvtq_s32_v),
3244   NEONMAP0(vcvtq_s64_v),
3245   NEONMAP0(vcvtq_u32_v),
3246   NEONMAP0(vcvtq_u64_v),
3247   NEONMAP0(vext_v),
3248   NEONMAP0(vextq_v),
3249   NEONMAP0(vfma_v),
3250   NEONMAP0(vfmaq_v),
3251   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3252   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3253   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3254   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3255   NEONMAP0(vld1_dup_v),
3256   NEONMAP1(vld1_v, arm_neon_vld1, 0),
3257   NEONMAP0(vld1q_dup_v),
3258   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
3259   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
3260   NEONMAP1(vld2_v, arm_neon_vld2, 0),
3261   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
3262   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
3263   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
3264   NEONMAP1(vld3_v, arm_neon_vld3, 0),
3265   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
3266   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
3267   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
3268   NEONMAP1(vld4_v, arm_neon_vld4, 0),
3269   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
3270   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
3271   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3272   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
3273   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
3274   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3275   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3276   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
3277   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
3278   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3279   NEONMAP0(vmovl_v),
3280   NEONMAP0(vmovn_v),
3281   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
3282   NEONMAP0(vmull_v),
3283   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
3284   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3285   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3286   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
3287   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3288   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3289   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
3290   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
3291   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
3292   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
3293   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
3294   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3295   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3296   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
3297   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
3298   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
3299   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
3300   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
3301   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
3302   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
3303   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
3304   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
3305   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
3306   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
3307   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3308   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3309   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3310   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3311   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3312   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3313   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
3314   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
3315   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3316   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3317   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
3318   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3319   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3320   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
3321   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
3322   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3323   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3324   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
3325   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
3326   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
3327   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
3328   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
3329   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
3330   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
3331   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
3332   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
3333   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
3334   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
3335   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
3336   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3337   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3338   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3339   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3340   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3341   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3342   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
3343   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
3344   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
3345   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
3346   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
3347   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
3348   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
3349   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
3350   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
3351   NEONMAP0(vshl_n_v),
3352   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3353   NEONMAP0(vshll_n_v),
3354   NEONMAP0(vshlq_n_v),
3355   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3356   NEONMAP0(vshr_n_v),
3357   NEONMAP0(vshrn_n_v),
3358   NEONMAP0(vshrq_n_v),
3359   NEONMAP1(vst1_v, arm_neon_vst1, 0),
3360   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
3361   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
3362   NEONMAP1(vst2_v, arm_neon_vst2, 0),
3363   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
3364   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
3365   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
3366   NEONMAP1(vst3_v, arm_neon_vst3, 0),
3367   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
3368   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
3369   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
3370   NEONMAP1(vst4_v, arm_neon_vst4, 0),
3371   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
3372   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
3373   NEONMAP0(vsubhn_v),
3374   NEONMAP0(vtrn_v),
3375   NEONMAP0(vtrnq_v),
3376   NEONMAP0(vtst_v),
3377   NEONMAP0(vtstq_v),
3378   NEONMAP0(vuzp_v),
3379   NEONMAP0(vuzpq_v),
3380   NEONMAP0(vzip_v),
3381   NEONMAP0(vzipq_v)
3382 };
3383 
3384 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
3385   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
3386   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
3387   NEONMAP0(vaddhn_v),
3388   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
3389   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
3390   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
3391   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
3392   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
3393   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
3394   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
3395   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
3396   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
3397   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
3398   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
3399   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
3400   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
3401   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
3402   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3403   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3404   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3405   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3406   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
3407   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
3408   NEONMAP0(vcvt_f32_v),
3409   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3410   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3411   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3412   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3413   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3414   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3415   NEONMAP0(vcvtq_f32_v),
3416   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3417   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3418   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3419   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3420   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3421   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3422   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
3423   NEONMAP0(vext_v),
3424   NEONMAP0(vextq_v),
3425   NEONMAP0(vfma_v),
3426   NEONMAP0(vfmaq_v),
3427   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3428   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3429   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3430   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3431   NEONMAP0(vmovl_v),
3432   NEONMAP0(vmovn_v),
3433   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
3434   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
3435   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
3436   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3437   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3438   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
3439   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
3440   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
3441   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3442   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3443   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
3444   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
3445   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
3446   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
3447   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
3448   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
3449   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
3450   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
3451   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
3452   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
3453   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
3454   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3455   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3456   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
3457   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3458   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
3459   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3460   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
3461   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
3462   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3463   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3464   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
3465   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3466   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3467   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
3468   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
3469   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3470   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3471   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3472   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3473   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3474   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3475   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3476   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3477   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
3478   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
3479   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
3480   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
3481   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
3482   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
3483   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
3484   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
3485   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
3486   NEONMAP0(vshl_n_v),
3487   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3488   NEONMAP0(vshll_n_v),
3489   NEONMAP0(vshlq_n_v),
3490   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3491   NEONMAP0(vshr_n_v),
3492   NEONMAP0(vshrn_n_v),
3493   NEONMAP0(vshrq_n_v),
3494   NEONMAP0(vsubhn_v),
3495   NEONMAP0(vtst_v),
3496   NEONMAP0(vtstq_v),
3497 };
3498 
3499 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3500   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3501   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3502   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3503   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3504   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3505   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3506   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3507   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3508   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3509   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3510   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3511   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3512   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3513   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3514   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3515   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3516   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3517   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3518   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3519   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3520   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3521   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3522   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3523   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3524   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3525   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3526   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3527   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3528   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3529   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3530   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3531   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3532   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3533   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3534   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3535   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3536   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3537   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3538   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3539   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3540   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3541   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3542   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3543   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3544   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3545   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3546   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3547   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3548   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3549   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3550   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3551   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3552   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3553   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3554   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3555   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3556   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3557   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3558   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3559   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3560   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3561   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3562   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3563   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3564   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3565   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3566   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3567   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3568   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3569   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3570   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3571   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3572   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3573   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3574   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3575   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3576   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3577   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3578   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3579   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3580   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3581   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3582   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3583   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3584   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3585   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3586   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3587   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3588   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3589   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3590   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3591   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3592   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3593   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3594   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3595   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3596   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3597   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3598   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3599   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3600   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3601   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3602   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3603   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3604   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3605   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3606   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3607   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3608   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3609   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3610   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3611   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3612   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3613   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3614   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3615   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3616   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3617   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3618   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3619   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3620   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3621   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3622   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3623   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3624   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3625   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3626   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3627   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3628   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3629   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3630   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3631   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3632   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3633   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3634   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3635   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3636   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3637   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3638   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3639   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3640   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3641   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3642   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3643   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3644   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3645   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3646   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3647   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3648   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3649   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3650   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3651   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3652   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3653   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3654   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3655   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3656   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3657   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3658   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3659   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3660   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3661   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3662   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3663   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3664   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3665   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3666   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3667   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3668   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3669   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3670   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3671   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3672   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3673   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3674   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3675   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3676   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3677   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3678   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3679   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3680   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3681   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3682   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3683   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3684   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3685   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3686   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3687   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3688   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3689   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3690   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3691   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3692 };
3693 
3694 #undef NEONMAP0
3695 #undef NEONMAP1
3696 #undef NEONMAP2
3697 
3698 static bool NEONSIMDIntrinsicsProvenSorted = false;
3699 
3700 static bool AArch64SIMDIntrinsicsProvenSorted = false;
3701 static bool AArch64SISDIntrinsicsProvenSorted = false;
3702 
3703 
3704 static const NeonIntrinsicInfo *
3705 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
3706                        unsigned BuiltinID, bool &MapProvenSorted) {
3707 
3708 #ifndef NDEBUG
3709   if (!MapProvenSorted) {
3710     assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3711     MapProvenSorted = true;
3712   }
3713 #endif
3714 
3715   const NeonIntrinsicInfo *Builtin =
3716       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3717 
3718   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
3719     return Builtin;
3720 
3721   return nullptr;
3722 }
3723 
3724 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3725                                                    unsigned Modifier,
3726                                                    llvm::Type *ArgType,
3727                                                    const CallExpr *E) {
3728   int VectorSize = 0;
3729   if (Modifier & Use64BitVectors)
3730     VectorSize = 64;
3731   else if (Modifier & Use128BitVectors)
3732     VectorSize = 128;
3733 
3734   // Return type.
3735   SmallVector<llvm::Type *, 3> Tys;
3736   if (Modifier & AddRetType) {
3737     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
3738     if (Modifier & VectorizeRetType)
3739       Ty = llvm::VectorType::get(
3740           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
3741 
3742     Tys.push_back(Ty);
3743   }
3744 
3745   // Arguments.
3746   if (Modifier & VectorizeArgTypes) {
3747     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
3748     ArgType = llvm::VectorType::get(ArgType, Elts);
3749   }
3750 
3751   if (Modifier & (Add1ArgType | Add2ArgTypes))
3752     Tys.push_back(ArgType);
3753 
3754   if (Modifier & Add2ArgTypes)
3755     Tys.push_back(ArgType);
3756 
3757   if (Modifier & InventFloatType)
3758     Tys.push_back(FloatTy);
3759 
3760   return CGM.getIntrinsic(IntrinsicID, Tys);
3761 }
3762 
3763 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
3764                                             const NeonIntrinsicInfo &SISDInfo,
3765                                             SmallVectorImpl<Value *> &Ops,
3766                                             const CallExpr *E) {
3767   unsigned BuiltinID = SISDInfo.BuiltinID;
3768   unsigned int Int = SISDInfo.LLVMIntrinsic;
3769   unsigned Modifier = SISDInfo.TypeModifier;
3770   const char *s = SISDInfo.NameHint;
3771 
3772   switch (BuiltinID) {
3773   case NEON::BI__builtin_neon_vcled_s64:
3774   case NEON::BI__builtin_neon_vcled_u64:
3775   case NEON::BI__builtin_neon_vcles_f32:
3776   case NEON::BI__builtin_neon_vcled_f64:
3777   case NEON::BI__builtin_neon_vcltd_s64:
3778   case NEON::BI__builtin_neon_vcltd_u64:
3779   case NEON::BI__builtin_neon_vclts_f32:
3780   case NEON::BI__builtin_neon_vcltd_f64:
3781   case NEON::BI__builtin_neon_vcales_f32:
3782   case NEON::BI__builtin_neon_vcaled_f64:
3783   case NEON::BI__builtin_neon_vcalts_f32:
3784   case NEON::BI__builtin_neon_vcaltd_f64:
3785     // Only one direction of comparisons actually exist, cmle is actually a cmge
3786     // with swapped operands. The table gives us the right intrinsic but we
3787     // still need to do the swap.
3788     std::swap(Ops[0], Ops[1]);
3789     break;
3790   }
3791 
3792   assert(Int && "Generic code assumes a valid intrinsic");
3793 
3794   // Determine the type(s) of this overloaded AArch64 intrinsic.
3795   const Expr *Arg = E->getArg(0);
3796   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3797   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3798 
3799   int j = 0;
3800   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3801   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3802        ai != ae; ++ai, ++j) {
3803     llvm::Type *ArgTy = ai->getType();
3804     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
3805              ArgTy->getPrimitiveSizeInBits())
3806       continue;
3807 
3808     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
3809     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
3810     // it before inserting.
3811     Ops[j] =
3812         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
3813     Ops[j] =
3814         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
3815   }
3816 
3817   Value *Result = CGF.EmitNeonCall(F, Ops, s);
3818   llvm::Type *ResultType = CGF.ConvertType(E->getType());
3819   if (ResultType->getPrimitiveSizeInBits() <
3820       Result->getType()->getPrimitiveSizeInBits())
3821     return CGF.Builder.CreateExtractElement(Result, C0);
3822 
3823   return CGF.Builder.CreateBitCast(Result, ResultType, s);
3824 }
3825 
3826 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
3827     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
3828     const char *NameHint, unsigned Modifier, const CallExpr *E,
3829     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
3830   // Get the last argument, which specifies the vector type.
3831   llvm::APSInt NeonTypeConst;
3832   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3833   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
3834     return nullptr;
3835 
3836   // Determine the type of this overloaded NEON intrinsic.
3837   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
3838   bool Usgn = Type.isUnsigned();
3839   bool Quad = Type.isQuad();
3840 
3841   llvm::VectorType *VTy = GetNeonType(this, Type);
3842   llvm::Type *Ty = VTy;
3843   if (!Ty)
3844     return nullptr;
3845 
3846   auto getAlignmentValue32 = [&](Address addr) -> Value* {
3847     return Builder.getInt32(addr.getAlignment().getQuantity());
3848   };
3849 
3850   unsigned Int = LLVMIntrinsic;
3851   if ((Modifier & UnsignedAlts) && !Usgn)
3852     Int = AltLLVMIntrinsic;
3853 
3854   switch (BuiltinID) {
3855   default: break;
3856   case NEON::BI__builtin_neon_vabs_v:
3857   case NEON::BI__builtin_neon_vabsq_v:
3858     if (VTy->getElementType()->isFloatingPointTy())
3859       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
3860     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
3861   case NEON::BI__builtin_neon_vaddhn_v: {
3862     llvm::VectorType *SrcTy =
3863         llvm::VectorType::getExtendedElementVectorType(VTy);
3864 
3865     // %sum = add <4 x i32> %lhs, %rhs
3866     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3867     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3868     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3869 
3870     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3871     Constant *ShiftAmt =
3872         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3873     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3874 
3875     // %res = trunc <4 x i32> %high to <4 x i16>
3876     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3877   }
3878   case NEON::BI__builtin_neon_vcale_v:
3879   case NEON::BI__builtin_neon_vcaleq_v:
3880   case NEON::BI__builtin_neon_vcalt_v:
3881   case NEON::BI__builtin_neon_vcaltq_v:
3882     std::swap(Ops[0], Ops[1]);
3883     LLVM_FALLTHROUGH;
3884   case NEON::BI__builtin_neon_vcage_v:
3885   case NEON::BI__builtin_neon_vcageq_v:
3886   case NEON::BI__builtin_neon_vcagt_v:
3887   case NEON::BI__builtin_neon_vcagtq_v: {
3888     llvm::Type *VecFlt = llvm::VectorType::get(
3889         VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
3890         VTy->getNumElements());
3891     llvm::Type *Tys[] = { VTy, VecFlt };
3892     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3893     return EmitNeonCall(F, Ops, NameHint);
3894   }
3895   case NEON::BI__builtin_neon_vclz_v:
3896   case NEON::BI__builtin_neon_vclzq_v:
3897     // We generate target-independent intrinsic, which needs a second argument
3898     // for whether or not clz of zero is undefined; on ARM it isn't.
3899     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3900     break;
3901   case NEON::BI__builtin_neon_vcvt_f32_v:
3902   case NEON::BI__builtin_neon_vcvtq_f32_v:
3903     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3904     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3905     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3906                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3907   case NEON::BI__builtin_neon_vcvt_n_f32_v:
3908   case NEON::BI__builtin_neon_vcvt_n_f64_v:
3909   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3910   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3911     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3912     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
3913     Function *F = CGM.getIntrinsic(Int, Tys);
3914     return EmitNeonCall(F, Ops, "vcvt_n");
3915   }
3916   case NEON::BI__builtin_neon_vcvt_n_s32_v:
3917   case NEON::BI__builtin_neon_vcvt_n_u32_v:
3918   case NEON::BI__builtin_neon_vcvt_n_s64_v:
3919   case NEON::BI__builtin_neon_vcvt_n_u64_v:
3920   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3921   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3922   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3923   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3924     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3925     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3926     return EmitNeonCall(F, Ops, "vcvt_n");
3927   }
3928   case NEON::BI__builtin_neon_vcvt_s32_v:
3929   case NEON::BI__builtin_neon_vcvt_u32_v:
3930   case NEON::BI__builtin_neon_vcvt_s64_v:
3931   case NEON::BI__builtin_neon_vcvt_u64_v:
3932   case NEON::BI__builtin_neon_vcvtq_s32_v:
3933   case NEON::BI__builtin_neon_vcvtq_u32_v:
3934   case NEON::BI__builtin_neon_vcvtq_s64_v:
3935   case NEON::BI__builtin_neon_vcvtq_u64_v: {
3936     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3937     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3938                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3939   }
3940   case NEON::BI__builtin_neon_vcvta_s32_v:
3941   case NEON::BI__builtin_neon_vcvta_s64_v:
3942   case NEON::BI__builtin_neon_vcvta_u32_v:
3943   case NEON::BI__builtin_neon_vcvta_u64_v:
3944   case NEON::BI__builtin_neon_vcvtaq_s32_v:
3945   case NEON::BI__builtin_neon_vcvtaq_s64_v:
3946   case NEON::BI__builtin_neon_vcvtaq_u32_v:
3947   case NEON::BI__builtin_neon_vcvtaq_u64_v:
3948   case NEON::BI__builtin_neon_vcvtn_s32_v:
3949   case NEON::BI__builtin_neon_vcvtn_s64_v:
3950   case NEON::BI__builtin_neon_vcvtn_u32_v:
3951   case NEON::BI__builtin_neon_vcvtn_u64_v:
3952   case NEON::BI__builtin_neon_vcvtnq_s32_v:
3953   case NEON::BI__builtin_neon_vcvtnq_s64_v:
3954   case NEON::BI__builtin_neon_vcvtnq_u32_v:
3955   case NEON::BI__builtin_neon_vcvtnq_u64_v:
3956   case NEON::BI__builtin_neon_vcvtp_s32_v:
3957   case NEON::BI__builtin_neon_vcvtp_s64_v:
3958   case NEON::BI__builtin_neon_vcvtp_u32_v:
3959   case NEON::BI__builtin_neon_vcvtp_u64_v:
3960   case NEON::BI__builtin_neon_vcvtpq_s32_v:
3961   case NEON::BI__builtin_neon_vcvtpq_s64_v:
3962   case NEON::BI__builtin_neon_vcvtpq_u32_v:
3963   case NEON::BI__builtin_neon_vcvtpq_u64_v:
3964   case NEON::BI__builtin_neon_vcvtm_s32_v:
3965   case NEON::BI__builtin_neon_vcvtm_s64_v:
3966   case NEON::BI__builtin_neon_vcvtm_u32_v:
3967   case NEON::BI__builtin_neon_vcvtm_u64_v:
3968   case NEON::BI__builtin_neon_vcvtmq_s32_v:
3969   case NEON::BI__builtin_neon_vcvtmq_s64_v:
3970   case NEON::BI__builtin_neon_vcvtmq_u32_v:
3971   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
3972     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3973     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
3974   }
3975   case NEON::BI__builtin_neon_vext_v:
3976   case NEON::BI__builtin_neon_vextq_v: {
3977     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3978     SmallVector<uint32_t, 16> Indices;
3979     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3980       Indices.push_back(i+CV);
3981 
3982     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3983     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3984     return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
3985   }
3986   case NEON::BI__builtin_neon_vfma_v:
3987   case NEON::BI__builtin_neon_vfmaq_v: {
3988     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
3989     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3990     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3991     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3992 
3993     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
3994     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
3995   }
3996   case NEON::BI__builtin_neon_vld1_v:
3997   case NEON::BI__builtin_neon_vld1q_v: {
3998     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3999     Ops.push_back(getAlignmentValue32(PtrOp0));
4000     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
4001   }
4002   case NEON::BI__builtin_neon_vld2_v:
4003   case NEON::BI__builtin_neon_vld2q_v:
4004   case NEON::BI__builtin_neon_vld3_v:
4005   case NEON::BI__builtin_neon_vld3q_v:
4006   case NEON::BI__builtin_neon_vld4_v:
4007   case NEON::BI__builtin_neon_vld4q_v: {
4008     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4009     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4010     Value *Align = getAlignmentValue32(PtrOp1);
4011     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
4012     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4013     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4014     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4015   }
4016   case NEON::BI__builtin_neon_vld1_dup_v:
4017   case NEON::BI__builtin_neon_vld1q_dup_v: {
4018     Value *V = UndefValue::get(Ty);
4019     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
4020     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
4021     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
4022     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4023     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
4024     return EmitNeonSplat(Ops[0], CI);
4025   }
4026   case NEON::BI__builtin_neon_vld2_lane_v:
4027   case NEON::BI__builtin_neon_vld2q_lane_v:
4028   case NEON::BI__builtin_neon_vld3_lane_v:
4029   case NEON::BI__builtin_neon_vld3q_lane_v:
4030   case NEON::BI__builtin_neon_vld4_lane_v:
4031   case NEON::BI__builtin_neon_vld4q_lane_v: {
4032     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4033     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4034     for (unsigned I = 2; I < Ops.size() - 1; ++I)
4035       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
4036     Ops.push_back(getAlignmentValue32(PtrOp1));
4037     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
4038     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4039     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4040     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4041   }
4042   case NEON::BI__builtin_neon_vmovl_v: {
4043     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
4044     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
4045     if (Usgn)
4046       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
4047     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
4048   }
4049   case NEON::BI__builtin_neon_vmovn_v: {
4050     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4051     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
4052     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
4053   }
4054   case NEON::BI__builtin_neon_vmull_v:
4055     // FIXME: the integer vmull operations could be emitted in terms of pure
4056     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
4057     // hoisting the exts outside loops. Until global ISel comes along that can
4058     // see through such movement this leads to bad CodeGen. So we need an
4059     // intrinsic for now.
4060     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
4061     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
4062     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
4063   case NEON::BI__builtin_neon_vpadal_v:
4064   case NEON::BI__builtin_neon_vpadalq_v: {
4065     // The source operand type has twice as many elements of half the size.
4066     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4067     llvm::Type *EltTy =
4068       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4069     llvm::Type *NarrowTy =
4070       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4071     llvm::Type *Tys[2] = { Ty, NarrowTy };
4072     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
4073   }
4074   case NEON::BI__builtin_neon_vpaddl_v:
4075   case NEON::BI__builtin_neon_vpaddlq_v: {
4076     // The source operand type has twice as many elements of half the size.
4077     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4078     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4079     llvm::Type *NarrowTy =
4080       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4081     llvm::Type *Tys[2] = { Ty, NarrowTy };
4082     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
4083   }
4084   case NEON::BI__builtin_neon_vqdmlal_v:
4085   case NEON::BI__builtin_neon_vqdmlsl_v: {
4086     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
4087     Ops[1] =
4088         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
4089     Ops.resize(2);
4090     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
4091   }
4092   case NEON::BI__builtin_neon_vqshl_n_v:
4093   case NEON::BI__builtin_neon_vqshlq_n_v:
4094     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
4095                         1, false);
4096   case NEON::BI__builtin_neon_vqshlu_n_v:
4097   case NEON::BI__builtin_neon_vqshluq_n_v:
4098     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
4099                         1, false);
4100   case NEON::BI__builtin_neon_vrecpe_v:
4101   case NEON::BI__builtin_neon_vrecpeq_v:
4102   case NEON::BI__builtin_neon_vrsqrte_v:
4103   case NEON::BI__builtin_neon_vrsqrteq_v:
4104     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
4105     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
4106 
4107   case NEON::BI__builtin_neon_vrshr_n_v:
4108   case NEON::BI__builtin_neon_vrshrq_n_v:
4109     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
4110                         1, true);
4111   case NEON::BI__builtin_neon_vshl_n_v:
4112   case NEON::BI__builtin_neon_vshlq_n_v:
4113     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
4114     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
4115                              "vshl_n");
4116   case NEON::BI__builtin_neon_vshll_n_v: {
4117     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
4118     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4119     if (Usgn)
4120       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
4121     else
4122       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
4123     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
4124     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
4125   }
4126   case NEON::BI__builtin_neon_vshrn_n_v: {
4127     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4128     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4129     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
4130     if (Usgn)
4131       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
4132     else
4133       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
4134     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
4135   }
4136   case NEON::BI__builtin_neon_vshr_n_v:
4137   case NEON::BI__builtin_neon_vshrq_n_v:
4138     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
4139   case NEON::BI__builtin_neon_vst1_v:
4140   case NEON::BI__builtin_neon_vst1q_v:
4141   case NEON::BI__builtin_neon_vst2_v:
4142   case NEON::BI__builtin_neon_vst2q_v:
4143   case NEON::BI__builtin_neon_vst3_v:
4144   case NEON::BI__builtin_neon_vst3q_v:
4145   case NEON::BI__builtin_neon_vst4_v:
4146   case NEON::BI__builtin_neon_vst4q_v:
4147   case NEON::BI__builtin_neon_vst2_lane_v:
4148   case NEON::BI__builtin_neon_vst2q_lane_v:
4149   case NEON::BI__builtin_neon_vst3_lane_v:
4150   case NEON::BI__builtin_neon_vst3q_lane_v:
4151   case NEON::BI__builtin_neon_vst4_lane_v:
4152   case NEON::BI__builtin_neon_vst4q_lane_v: {
4153     llvm::Type *Tys[] = {Int8PtrTy, Ty};
4154     Ops.push_back(getAlignmentValue32(PtrOp0));
4155     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
4156   }
4157   case NEON::BI__builtin_neon_vsubhn_v: {
4158     llvm::VectorType *SrcTy =
4159         llvm::VectorType::getExtendedElementVectorType(VTy);
4160 
4161     // %sum = add <4 x i32> %lhs, %rhs
4162     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4163     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4164     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
4165 
4166     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4167     Constant *ShiftAmt =
4168         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4169     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
4170 
4171     // %res = trunc <4 x i32> %high to <4 x i16>
4172     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
4173   }
4174   case NEON::BI__builtin_neon_vtrn_v:
4175   case NEON::BI__builtin_neon_vtrnq_v: {
4176     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4177     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4178     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4179     Value *SV = nullptr;
4180 
4181     for (unsigned vi = 0; vi != 2; ++vi) {
4182       SmallVector<uint32_t, 16> Indices;
4183       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4184         Indices.push_back(i+vi);
4185         Indices.push_back(i+e+vi);
4186       }
4187       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4188       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
4189       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4190     }
4191     return SV;
4192   }
4193   case NEON::BI__builtin_neon_vtst_v:
4194   case NEON::BI__builtin_neon_vtstq_v: {
4195     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4196     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4197     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4198     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4199                                 ConstantAggregateZero::get(Ty));
4200     return Builder.CreateSExt(Ops[0], Ty, "vtst");
4201   }
4202   case NEON::BI__builtin_neon_vuzp_v:
4203   case NEON::BI__builtin_neon_vuzpq_v: {
4204     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4205     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4206     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4207     Value *SV = nullptr;
4208 
4209     for (unsigned vi = 0; vi != 2; ++vi) {
4210       SmallVector<uint32_t, 16> Indices;
4211       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4212         Indices.push_back(2*i+vi);
4213 
4214       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4215       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
4216       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4217     }
4218     return SV;
4219   }
4220   case NEON::BI__builtin_neon_vzip_v:
4221   case NEON::BI__builtin_neon_vzipq_v: {
4222     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4223     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4224     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4225     Value *SV = nullptr;
4226 
4227     for (unsigned vi = 0; vi != 2; ++vi) {
4228       SmallVector<uint32_t, 16> Indices;
4229       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4230         Indices.push_back((i + vi*e) >> 1);
4231         Indices.push_back(((i + vi*e) >> 1)+e);
4232       }
4233       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4234       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
4235       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4236     }
4237     return SV;
4238   }
4239   }
4240 
4241   assert(Int && "Expected valid intrinsic number");
4242 
4243   // Determine the type(s) of this overloaded AArch64 intrinsic.
4244   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
4245 
4246   Value *Result = EmitNeonCall(F, Ops, NameHint);
4247   llvm::Type *ResultType = ConvertType(E->getType());
4248   // AArch64 intrinsic one-element vector type cast to
4249   // scalar type expected by the builtin
4250   return Builder.CreateBitCast(Result, ResultType, NameHint);
4251 }
4252 
4253 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
4254     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
4255     const CmpInst::Predicate Ip, const Twine &Name) {
4256   llvm::Type *OTy = Op->getType();
4257 
4258   // FIXME: this is utterly horrific. We should not be looking at previous
4259   // codegen context to find out what needs doing. Unfortunately TableGen
4260   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
4261   // (etc).
4262   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
4263     OTy = BI->getOperand(0)->getType();
4264 
4265   Op = Builder.CreateBitCast(Op, OTy);
4266   if (OTy->getScalarType()->isFloatingPointTy()) {
4267     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
4268   } else {
4269     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
4270   }
4271   return Builder.CreateSExt(Op, Ty, Name);
4272 }
4273 
4274 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
4275                                  Value *ExtOp, Value *IndexOp,
4276                                  llvm::Type *ResTy, unsigned IntID,
4277                                  const char *Name) {
4278   SmallVector<Value *, 2> TblOps;
4279   if (ExtOp)
4280     TblOps.push_back(ExtOp);
4281 
4282   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
4283   SmallVector<uint32_t, 16> Indices;
4284   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
4285   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
4286     Indices.push_back(2*i);
4287     Indices.push_back(2*i+1);
4288   }
4289 
4290   int PairPos = 0, End = Ops.size() - 1;
4291   while (PairPos < End) {
4292     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4293                                                      Ops[PairPos+1], Indices,
4294                                                      Name));
4295     PairPos += 2;
4296   }
4297 
4298   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
4299   // of the 128-bit lookup table with zero.
4300   if (PairPos == End) {
4301     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
4302     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4303                                                      ZeroTbl, Indices, Name));
4304   }
4305 
4306   Function *TblF;
4307   TblOps.push_back(IndexOp);
4308   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
4309 
4310   return CGF.EmitNeonCall(TblF, TblOps, Name);
4311 }
4312 
4313 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
4314   unsigned Value;
4315   switch (BuiltinID) {
4316   default:
4317     return nullptr;
4318   case ARM::BI__builtin_arm_nop:
4319     Value = 0;
4320     break;
4321   case ARM::BI__builtin_arm_yield:
4322   case ARM::BI__yield:
4323     Value = 1;
4324     break;
4325   case ARM::BI__builtin_arm_wfe:
4326   case ARM::BI__wfe:
4327     Value = 2;
4328     break;
4329   case ARM::BI__builtin_arm_wfi:
4330   case ARM::BI__wfi:
4331     Value = 3;
4332     break;
4333   case ARM::BI__builtin_arm_sev:
4334   case ARM::BI__sev:
4335     Value = 4;
4336     break;
4337   case ARM::BI__builtin_arm_sevl:
4338   case ARM::BI__sevl:
4339     Value = 5;
4340     break;
4341   }
4342 
4343   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
4344                             llvm::ConstantInt::get(Int32Ty, Value));
4345 }
4346 
4347 // Generates the IR for the read/write special register builtin,
4348 // ValueType is the type of the value that is to be written or read,
4349 // RegisterType is the type of the register being written to or read from.
4350 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
4351                                          const CallExpr *E,
4352                                          llvm::Type *RegisterType,
4353                                          llvm::Type *ValueType,
4354                                          bool IsRead,
4355                                          StringRef SysReg = "") {
4356   // write and register intrinsics only support 32 and 64 bit operations.
4357   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
4358           && "Unsupported size for register.");
4359 
4360   CodeGen::CGBuilderTy &Builder = CGF.Builder;
4361   CodeGen::CodeGenModule &CGM = CGF.CGM;
4362   LLVMContext &Context = CGM.getLLVMContext();
4363 
4364   if (SysReg.empty()) {
4365     const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
4366     SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
4367   }
4368 
4369   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
4370   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4371   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4372 
4373   llvm::Type *Types[] = { RegisterType };
4374 
4375   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
4376   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
4377             && "Can't fit 64-bit value in 32-bit register");
4378 
4379   if (IsRead) {
4380     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
4381     llvm::Value *Call = Builder.CreateCall(F, Metadata);
4382 
4383     if (MixedTypes)
4384       // Read into 64 bit register and then truncate result to 32 bit.
4385       return Builder.CreateTrunc(Call, ValueType);
4386 
4387     if (ValueType->isPointerTy())
4388       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
4389       return Builder.CreateIntToPtr(Call, ValueType);
4390 
4391     return Call;
4392   }
4393 
4394   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
4395   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
4396   if (MixedTypes) {
4397     // Extend 32 bit write value to 64 bit to pass to write.
4398     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
4399     return Builder.CreateCall(F, { Metadata, ArgValue });
4400   }
4401 
4402   if (ValueType->isPointerTy()) {
4403     // Have VoidPtrTy ArgValue but want to return an i32/i64.
4404     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
4405     return Builder.CreateCall(F, { Metadata, ArgValue });
4406   }
4407 
4408   return Builder.CreateCall(F, { Metadata, ArgValue });
4409 }
4410 
4411 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
4412 /// argument that specifies the vector type.
4413 static bool HasExtraNeonArgument(unsigned BuiltinID) {
4414   switch (BuiltinID) {
4415   default: break;
4416   case NEON::BI__builtin_neon_vget_lane_i8:
4417   case NEON::BI__builtin_neon_vget_lane_i16:
4418   case NEON::BI__builtin_neon_vget_lane_i32:
4419   case NEON::BI__builtin_neon_vget_lane_i64:
4420   case NEON::BI__builtin_neon_vget_lane_f32:
4421   case NEON::BI__builtin_neon_vgetq_lane_i8:
4422   case NEON::BI__builtin_neon_vgetq_lane_i16:
4423   case NEON::BI__builtin_neon_vgetq_lane_i32:
4424   case NEON::BI__builtin_neon_vgetq_lane_i64:
4425   case NEON::BI__builtin_neon_vgetq_lane_f32:
4426   case NEON::BI__builtin_neon_vset_lane_i8:
4427   case NEON::BI__builtin_neon_vset_lane_i16:
4428   case NEON::BI__builtin_neon_vset_lane_i32:
4429   case NEON::BI__builtin_neon_vset_lane_i64:
4430   case NEON::BI__builtin_neon_vset_lane_f32:
4431   case NEON::BI__builtin_neon_vsetq_lane_i8:
4432   case NEON::BI__builtin_neon_vsetq_lane_i16:
4433   case NEON::BI__builtin_neon_vsetq_lane_i32:
4434   case NEON::BI__builtin_neon_vsetq_lane_i64:
4435   case NEON::BI__builtin_neon_vsetq_lane_f32:
4436   case NEON::BI__builtin_neon_vsha1h_u32:
4437   case NEON::BI__builtin_neon_vsha1cq_u32:
4438   case NEON::BI__builtin_neon_vsha1pq_u32:
4439   case NEON::BI__builtin_neon_vsha1mq_u32:
4440   case ARM::BI_MoveToCoprocessor:
4441   case ARM::BI_MoveToCoprocessor2:
4442     return false;
4443   }
4444   return true;
4445 }
4446 
4447 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
4448                                            const CallExpr *E) {
4449   if (auto Hint = GetValueForARMHint(BuiltinID))
4450     return Hint;
4451 
4452   if (BuiltinID == ARM::BI__emit) {
4453     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
4454     llvm::FunctionType *FTy =
4455         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
4456 
4457     APSInt Value;
4458     if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
4459       llvm_unreachable("Sema will ensure that the parameter is constant");
4460 
4461     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
4462 
4463     llvm::InlineAsm *Emit =
4464         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
4465                                  /*SideEffects=*/true)
4466                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
4467                                  /*SideEffects=*/true);
4468 
4469     return Builder.CreateCall(Emit);
4470   }
4471 
4472   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
4473     Value *Option = EmitScalarExpr(E->getArg(0));
4474     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
4475   }
4476 
4477   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
4478     Value *Address = EmitScalarExpr(E->getArg(0));
4479     Value *RW      = EmitScalarExpr(E->getArg(1));
4480     Value *IsData  = EmitScalarExpr(E->getArg(2));
4481 
4482     // Locality is not supported on ARM target
4483     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
4484 
4485     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4486     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4487   }
4488 
4489   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
4490     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4491     return Builder.CreateCall(
4492         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4493   }
4494 
4495   if (BuiltinID == ARM::BI__clear_cache) {
4496     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4497     const FunctionDecl *FD = E->getDirectCallee();
4498     Value *Ops[2];
4499     for (unsigned i = 0; i < 2; i++)
4500       Ops[i] = EmitScalarExpr(E->getArg(i));
4501     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4502     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4503     StringRef Name = FD->getName();
4504     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4505   }
4506 
4507   if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4508       BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4509     Function *F;
4510 
4511     switch (BuiltinID) {
4512     default: llvm_unreachable("unexpected builtin");
4513     case ARM::BI__builtin_arm_mcrr:
4514       F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4515       break;
4516     case ARM::BI__builtin_arm_mcrr2:
4517       F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4518       break;
4519     }
4520 
4521     // MCRR{2} instruction has 5 operands but
4522     // the intrinsic has 4 because Rt and Rt2
4523     // are represented as a single unsigned 64
4524     // bit integer in the intrinsic definition
4525     // but internally it's represented as 2 32
4526     // bit integers.
4527 
4528     Value *Coproc = EmitScalarExpr(E->getArg(0));
4529     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4530     Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4531     Value *CRm = EmitScalarExpr(E->getArg(3));
4532 
4533     Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4534     Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4535     Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4536     Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4537 
4538     return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4539   }
4540 
4541   if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4542       BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4543     Function *F;
4544 
4545     switch (BuiltinID) {
4546     default: llvm_unreachable("unexpected builtin");
4547     case ARM::BI__builtin_arm_mrrc:
4548       F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4549       break;
4550     case ARM::BI__builtin_arm_mrrc2:
4551       F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4552       break;
4553     }
4554 
4555     Value *Coproc = EmitScalarExpr(E->getArg(0));
4556     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4557     Value *CRm  = EmitScalarExpr(E->getArg(2));
4558     Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4559 
4560     // Returns an unsigned 64 bit integer, represented
4561     // as two 32 bit integers.
4562 
4563     Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4564     Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4565     Rt = Builder.CreateZExt(Rt, Int64Ty);
4566     Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4567 
4568     Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4569     RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4570     RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4571 
4572     return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4573   }
4574 
4575   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4576       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4577         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4578        getContext().getTypeSize(E->getType()) == 64) ||
4579       BuiltinID == ARM::BI__ldrexd) {
4580     Function *F;
4581 
4582     switch (BuiltinID) {
4583     default: llvm_unreachable("unexpected builtin");
4584     case ARM::BI__builtin_arm_ldaex:
4585       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4586       break;
4587     case ARM::BI__builtin_arm_ldrexd:
4588     case ARM::BI__builtin_arm_ldrex:
4589     case ARM::BI__ldrexd:
4590       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4591       break;
4592     }
4593 
4594     Value *LdPtr = EmitScalarExpr(E->getArg(0));
4595     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4596                                     "ldrexd");
4597 
4598     Value *Val0 = Builder.CreateExtractValue(Val, 1);
4599     Value *Val1 = Builder.CreateExtractValue(Val, 0);
4600     Val0 = Builder.CreateZExt(Val0, Int64Ty);
4601     Val1 = Builder.CreateZExt(Val1, Int64Ty);
4602 
4603     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4604     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4605     Val = Builder.CreateOr(Val, Val1);
4606     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4607   }
4608 
4609   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4610       BuiltinID == ARM::BI__builtin_arm_ldaex) {
4611     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4612 
4613     QualType Ty = E->getType();
4614     llvm::Type *RealResTy = ConvertType(Ty);
4615     llvm::Type *PtrTy = llvm::IntegerType::get(
4616         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
4617     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
4618 
4619     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4620                                        ? Intrinsic::arm_ldaex
4621                                        : Intrinsic::arm_ldrex,
4622                                    PtrTy);
4623     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4624 
4625     if (RealResTy->isPointerTy())
4626       return Builder.CreateIntToPtr(Val, RealResTy);
4627     else {
4628       llvm::Type *IntResTy = llvm::IntegerType::get(
4629           getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
4630       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4631       return Builder.CreateBitCast(Val, RealResTy);
4632     }
4633   }
4634 
4635   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
4636       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4637         BuiltinID == ARM::BI__builtin_arm_strex) &&
4638        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
4639     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4640                                        ? Intrinsic::arm_stlexd
4641                                        : Intrinsic::arm_strexd);
4642     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
4643 
4644     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4645     Value *Val = EmitScalarExpr(E->getArg(0));
4646     Builder.CreateStore(Val, Tmp);
4647 
4648     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4649     Val = Builder.CreateLoad(LdPtr);
4650 
4651     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4652     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4653     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4654     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4655   }
4656 
4657   if (BuiltinID == ARM::BI__builtin_arm_strex ||
4658       BuiltinID == ARM::BI__builtin_arm_stlex) {
4659     Value *StoreVal = EmitScalarExpr(E->getArg(0));
4660     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4661 
4662     QualType Ty = E->getArg(0)->getType();
4663     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4664                                                  getContext().getTypeSize(Ty));
4665     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4666 
4667     if (StoreVal->getType()->isPointerTy())
4668       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4669     else {
4670       llvm::Type *IntTy = llvm::IntegerType::get(
4671           getLLVMContext(),
4672           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
4673       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
4674       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4675     }
4676 
4677     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4678                                        ? Intrinsic::arm_stlex
4679                                        : Intrinsic::arm_strex,
4680                                    StoreAddr->getType());
4681     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4682   }
4683 
4684   switch (BuiltinID) {
4685   case ARM::BI__iso_volatile_load8:
4686   case ARM::BI__iso_volatile_load16:
4687   case ARM::BI__iso_volatile_load32:
4688   case ARM::BI__iso_volatile_load64: {
4689     Value *Ptr = EmitScalarExpr(E->getArg(0));
4690     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4691     CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
4692     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4693                                              LoadSize.getQuantity() * 8);
4694     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4695     llvm::LoadInst *Load =
4696       Builder.CreateAlignedLoad(Ptr, LoadSize);
4697     Load->setVolatile(true);
4698     return Load;
4699   }
4700   case ARM::BI__iso_volatile_store8:
4701   case ARM::BI__iso_volatile_store16:
4702   case ARM::BI__iso_volatile_store32:
4703   case ARM::BI__iso_volatile_store64: {
4704     Value *Ptr = EmitScalarExpr(E->getArg(0));
4705     Value *Value = EmitScalarExpr(E->getArg(1));
4706     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4707     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
4708     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4709                                              StoreSize.getQuantity() * 8);
4710     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4711     llvm::StoreInst *Store =
4712       Builder.CreateAlignedStore(Value, Ptr,
4713                                  StoreSize);
4714     Store->setVolatile(true);
4715     return Store;
4716   }
4717   }
4718 
4719   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
4720     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4721     return Builder.CreateCall(F);
4722   }
4723 
4724   // CRC32
4725   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4726   switch (BuiltinID) {
4727   case ARM::BI__builtin_arm_crc32b:
4728     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
4729   case ARM::BI__builtin_arm_crc32cb:
4730     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
4731   case ARM::BI__builtin_arm_crc32h:
4732     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
4733   case ARM::BI__builtin_arm_crc32ch:
4734     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
4735   case ARM::BI__builtin_arm_crc32w:
4736   case ARM::BI__builtin_arm_crc32d:
4737     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
4738   case ARM::BI__builtin_arm_crc32cw:
4739   case ARM::BI__builtin_arm_crc32cd:
4740     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
4741   }
4742 
4743   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4744     Value *Arg0 = EmitScalarExpr(E->getArg(0));
4745     Value *Arg1 = EmitScalarExpr(E->getArg(1));
4746 
4747     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
4748     // intrinsics, hence we need different codegen for these cases.
4749     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
4750         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
4751       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4752       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
4753       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
4754       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
4755 
4756       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4757       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
4758       return Builder.CreateCall(F, {Res, Arg1b});
4759     } else {
4760       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
4761 
4762       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4763       return Builder.CreateCall(F, {Arg0, Arg1});
4764     }
4765   }
4766 
4767   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
4768       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4769       BuiltinID == ARM::BI__builtin_arm_rsrp ||
4770       BuiltinID == ARM::BI__builtin_arm_wsr ||
4771       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
4772       BuiltinID == ARM::BI__builtin_arm_wsrp) {
4773 
4774     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
4775                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4776                   BuiltinID == ARM::BI__builtin_arm_rsrp;
4777 
4778     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
4779                             BuiltinID == ARM::BI__builtin_arm_wsrp;
4780 
4781     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4782                    BuiltinID == ARM::BI__builtin_arm_wsr64;
4783 
4784     llvm::Type *ValueType;
4785     llvm::Type *RegisterType;
4786     if (IsPointerBuiltin) {
4787       ValueType = VoidPtrTy;
4788       RegisterType = Int32Ty;
4789     } else if (Is64Bit) {
4790       ValueType = RegisterType = Int64Ty;
4791     } else {
4792       ValueType = RegisterType = Int32Ty;
4793     }
4794 
4795     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4796   }
4797 
4798   // Find out if any arguments are required to be integer constant
4799   // expressions.
4800   unsigned ICEArguments = 0;
4801   ASTContext::GetBuiltinTypeError Error;
4802   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4803   assert(Error == ASTContext::GE_None && "Should not codegen an error");
4804 
4805   auto getAlignmentValue32 = [&](Address addr) -> Value* {
4806     return Builder.getInt32(addr.getAlignment().getQuantity());
4807   };
4808 
4809   Address PtrOp0 = Address::invalid();
4810   Address PtrOp1 = Address::invalid();
4811   SmallVector<Value*, 4> Ops;
4812   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
4813   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
4814   for (unsigned i = 0, e = NumArgs; i != e; i++) {
4815     if (i == 0) {
4816       switch (BuiltinID) {
4817       case NEON::BI__builtin_neon_vld1_v:
4818       case NEON::BI__builtin_neon_vld1q_v:
4819       case NEON::BI__builtin_neon_vld1q_lane_v:
4820       case NEON::BI__builtin_neon_vld1_lane_v:
4821       case NEON::BI__builtin_neon_vld1_dup_v:
4822       case NEON::BI__builtin_neon_vld1q_dup_v:
4823       case NEON::BI__builtin_neon_vst1_v:
4824       case NEON::BI__builtin_neon_vst1q_v:
4825       case NEON::BI__builtin_neon_vst1q_lane_v:
4826       case NEON::BI__builtin_neon_vst1_lane_v:
4827       case NEON::BI__builtin_neon_vst2_v:
4828       case NEON::BI__builtin_neon_vst2q_v:
4829       case NEON::BI__builtin_neon_vst2_lane_v:
4830       case NEON::BI__builtin_neon_vst2q_lane_v:
4831       case NEON::BI__builtin_neon_vst3_v:
4832       case NEON::BI__builtin_neon_vst3q_v:
4833       case NEON::BI__builtin_neon_vst3_lane_v:
4834       case NEON::BI__builtin_neon_vst3q_lane_v:
4835       case NEON::BI__builtin_neon_vst4_v:
4836       case NEON::BI__builtin_neon_vst4q_v:
4837       case NEON::BI__builtin_neon_vst4_lane_v:
4838       case NEON::BI__builtin_neon_vst4q_lane_v:
4839         // Get the alignment for the argument in addition to the value;
4840         // we'll use it later.
4841         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
4842         Ops.push_back(PtrOp0.getPointer());
4843         continue;
4844       }
4845     }
4846     if (i == 1) {
4847       switch (BuiltinID) {
4848       case NEON::BI__builtin_neon_vld2_v:
4849       case NEON::BI__builtin_neon_vld2q_v:
4850       case NEON::BI__builtin_neon_vld3_v:
4851       case NEON::BI__builtin_neon_vld3q_v:
4852       case NEON::BI__builtin_neon_vld4_v:
4853       case NEON::BI__builtin_neon_vld4q_v:
4854       case NEON::BI__builtin_neon_vld2_lane_v:
4855       case NEON::BI__builtin_neon_vld2q_lane_v:
4856       case NEON::BI__builtin_neon_vld3_lane_v:
4857       case NEON::BI__builtin_neon_vld3q_lane_v:
4858       case NEON::BI__builtin_neon_vld4_lane_v:
4859       case NEON::BI__builtin_neon_vld4q_lane_v:
4860       case NEON::BI__builtin_neon_vld2_dup_v:
4861       case NEON::BI__builtin_neon_vld3_dup_v:
4862       case NEON::BI__builtin_neon_vld4_dup_v:
4863         // Get the alignment for the argument in addition to the value;
4864         // we'll use it later.
4865         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
4866         Ops.push_back(PtrOp1.getPointer());
4867         continue;
4868       }
4869     }
4870 
4871     if ((ICEArguments & (1 << i)) == 0) {
4872       Ops.push_back(EmitScalarExpr(E->getArg(i)));
4873     } else {
4874       // If this is required to be a constant, constant fold it so that we know
4875       // that the generated intrinsic gets a ConstantInt.
4876       llvm::APSInt Result;
4877       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4878       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
4879       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4880     }
4881   }
4882 
4883   switch (BuiltinID) {
4884   default: break;
4885 
4886   case NEON::BI__builtin_neon_vget_lane_i8:
4887   case NEON::BI__builtin_neon_vget_lane_i16:
4888   case NEON::BI__builtin_neon_vget_lane_i32:
4889   case NEON::BI__builtin_neon_vget_lane_i64:
4890   case NEON::BI__builtin_neon_vget_lane_f32:
4891   case NEON::BI__builtin_neon_vgetq_lane_i8:
4892   case NEON::BI__builtin_neon_vgetq_lane_i16:
4893   case NEON::BI__builtin_neon_vgetq_lane_i32:
4894   case NEON::BI__builtin_neon_vgetq_lane_i64:
4895   case NEON::BI__builtin_neon_vgetq_lane_f32:
4896     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
4897 
4898   case NEON::BI__builtin_neon_vset_lane_i8:
4899   case NEON::BI__builtin_neon_vset_lane_i16:
4900   case NEON::BI__builtin_neon_vset_lane_i32:
4901   case NEON::BI__builtin_neon_vset_lane_i64:
4902   case NEON::BI__builtin_neon_vset_lane_f32:
4903   case NEON::BI__builtin_neon_vsetq_lane_i8:
4904   case NEON::BI__builtin_neon_vsetq_lane_i16:
4905   case NEON::BI__builtin_neon_vsetq_lane_i32:
4906   case NEON::BI__builtin_neon_vsetq_lane_i64:
4907   case NEON::BI__builtin_neon_vsetq_lane_f32:
4908     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4909 
4910   case NEON::BI__builtin_neon_vsha1h_u32:
4911     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
4912                         "vsha1h");
4913   case NEON::BI__builtin_neon_vsha1cq_u32:
4914     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
4915                         "vsha1h");
4916   case NEON::BI__builtin_neon_vsha1pq_u32:
4917     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
4918                         "vsha1h");
4919   case NEON::BI__builtin_neon_vsha1mq_u32:
4920     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
4921                         "vsha1h");
4922 
4923   // The ARM _MoveToCoprocessor builtins put the input register value as
4924   // the first argument, but the LLVM intrinsic expects it as the third one.
4925   case ARM::BI_MoveToCoprocessor:
4926   case ARM::BI_MoveToCoprocessor2: {
4927     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
4928                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
4929     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
4930                                   Ops[3], Ops[4], Ops[5]});
4931   }
4932   case ARM::BI_BitScanForward:
4933   case ARM::BI_BitScanForward64:
4934     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
4935   case ARM::BI_BitScanReverse:
4936   case ARM::BI_BitScanReverse64:
4937     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
4938 
4939   case ARM::BI_InterlockedAnd64:
4940     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
4941   case ARM::BI_InterlockedExchange64:
4942     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
4943   case ARM::BI_InterlockedExchangeAdd64:
4944     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
4945   case ARM::BI_InterlockedExchangeSub64:
4946     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
4947   case ARM::BI_InterlockedOr64:
4948     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
4949   case ARM::BI_InterlockedXor64:
4950     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
4951   case ARM::BI_InterlockedDecrement64:
4952     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
4953   case ARM::BI_InterlockedIncrement64:
4954     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
4955   }
4956 
4957   // Get the last argument, which specifies the vector type.
4958   assert(HasExtraArg);
4959   llvm::APSInt Result;
4960   const Expr *Arg = E->getArg(E->getNumArgs()-1);
4961   if (!Arg->isIntegerConstantExpr(Result, getContext()))
4962     return nullptr;
4963 
4964   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
4965       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
4966     // Determine the overloaded type of this builtin.
4967     llvm::Type *Ty;
4968     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
4969       Ty = FloatTy;
4970     else
4971       Ty = DoubleTy;
4972 
4973     // Determine whether this is an unsigned conversion or not.
4974     bool usgn = Result.getZExtValue() == 1;
4975     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
4976 
4977     // Call the appropriate intrinsic.
4978     Function *F = CGM.getIntrinsic(Int, Ty);
4979     return Builder.CreateCall(F, Ops, "vcvtr");
4980   }
4981 
4982   // Determine the type of this overloaded NEON intrinsic.
4983   NeonTypeFlags Type(Result.getZExtValue());
4984   bool usgn = Type.isUnsigned();
4985   bool rightShift = false;
4986 
4987   llvm::VectorType *VTy = GetNeonType(this, Type);
4988   llvm::Type *Ty = VTy;
4989   if (!Ty)
4990     return nullptr;
4991 
4992   // Many NEON builtins have identical semantics and uses in ARM and
4993   // AArch64. Emit these in a single function.
4994   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
4995   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4996       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
4997   if (Builtin)
4998     return EmitCommonNeonBuiltinExpr(
4999         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5000         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
5001 
5002   unsigned Int;
5003   switch (BuiltinID) {
5004   default: return nullptr;
5005   case NEON::BI__builtin_neon_vld1q_lane_v:
5006     // Handle 64-bit integer elements as a special case.  Use shuffles of
5007     // one-element vectors to avoid poor code for i64 in the backend.
5008     if (VTy->getElementType()->isIntegerTy(64)) {
5009       // Extract the other lane.
5010       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5011       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
5012       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
5013       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5014       // Load the value as a one-element vector.
5015       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
5016       llvm::Type *Tys[] = {Ty, Int8PtrTy};
5017       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
5018       Value *Align = getAlignmentValue32(PtrOp0);
5019       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
5020       // Combine them.
5021       uint32_t Indices[] = {1 - Lane, Lane};
5022       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
5023       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
5024     }
5025     // fall through
5026   case NEON::BI__builtin_neon_vld1_lane_v: {
5027     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5028     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
5029     Value *Ld = Builder.CreateLoad(PtrOp0);
5030     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
5031   }
5032   case NEON::BI__builtin_neon_vld2_dup_v:
5033   case NEON::BI__builtin_neon_vld3_dup_v:
5034   case NEON::BI__builtin_neon_vld4_dup_v: {
5035     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
5036     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
5037       switch (BuiltinID) {
5038       case NEON::BI__builtin_neon_vld2_dup_v:
5039         Int = Intrinsic::arm_neon_vld2;
5040         break;
5041       case NEON::BI__builtin_neon_vld3_dup_v:
5042         Int = Intrinsic::arm_neon_vld3;
5043         break;
5044       case NEON::BI__builtin_neon_vld4_dup_v:
5045         Int = Intrinsic::arm_neon_vld4;
5046         break;
5047       default: llvm_unreachable("unknown vld_dup intrinsic?");
5048       }
5049       llvm::Type *Tys[] = {Ty, Int8PtrTy};
5050       Function *F = CGM.getIntrinsic(Int, Tys);
5051       llvm::Value *Align = getAlignmentValue32(PtrOp1);
5052       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
5053       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5054       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5055       return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5056     }
5057     switch (BuiltinID) {
5058     case NEON::BI__builtin_neon_vld2_dup_v:
5059       Int = Intrinsic::arm_neon_vld2lane;
5060       break;
5061     case NEON::BI__builtin_neon_vld3_dup_v:
5062       Int = Intrinsic::arm_neon_vld3lane;
5063       break;
5064     case NEON::BI__builtin_neon_vld4_dup_v:
5065       Int = Intrinsic::arm_neon_vld4lane;
5066       break;
5067     default: llvm_unreachable("unknown vld_dup intrinsic?");
5068     }
5069     llvm::Type *Tys[] = {Ty, Int8PtrTy};
5070     Function *F = CGM.getIntrinsic(Int, Tys);
5071     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
5072 
5073     SmallVector<Value*, 6> Args;
5074     Args.push_back(Ops[1]);
5075     Args.append(STy->getNumElements(), UndefValue::get(Ty));
5076 
5077     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
5078     Args.push_back(CI);
5079     Args.push_back(getAlignmentValue32(PtrOp1));
5080 
5081     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
5082     // splat lane 0 to all elts in each vector of the result.
5083     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5084       Value *Val = Builder.CreateExtractValue(Ops[1], i);
5085       Value *Elt = Builder.CreateBitCast(Val, Ty);
5086       Elt = EmitNeonSplat(Elt, CI);
5087       Elt = Builder.CreateBitCast(Elt, Val->getType());
5088       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
5089     }
5090     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5091     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5092     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5093   }
5094   case NEON::BI__builtin_neon_vqrshrn_n_v:
5095     Int =
5096       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
5097     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
5098                         1, true);
5099   case NEON::BI__builtin_neon_vqrshrun_n_v:
5100     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
5101                         Ops, "vqrshrun_n", 1, true);
5102   case NEON::BI__builtin_neon_vqshrn_n_v:
5103     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
5104     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
5105                         1, true);
5106   case NEON::BI__builtin_neon_vqshrun_n_v:
5107     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
5108                         Ops, "vqshrun_n", 1, true);
5109   case NEON::BI__builtin_neon_vrecpe_v:
5110   case NEON::BI__builtin_neon_vrecpeq_v:
5111     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
5112                         Ops, "vrecpe");
5113   case NEON::BI__builtin_neon_vrshrn_n_v:
5114     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
5115                         Ops, "vrshrn_n", 1, true);
5116   case NEON::BI__builtin_neon_vrsra_n_v:
5117   case NEON::BI__builtin_neon_vrsraq_n_v:
5118     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5119     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5120     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
5121     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
5122     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
5123     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
5124   case NEON::BI__builtin_neon_vsri_n_v:
5125   case NEON::BI__builtin_neon_vsriq_n_v:
5126     rightShift = true;
5127     LLVM_FALLTHROUGH;
5128   case NEON::BI__builtin_neon_vsli_n_v:
5129   case NEON::BI__builtin_neon_vsliq_n_v:
5130     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
5131     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
5132                         Ops, "vsli_n");
5133   case NEON::BI__builtin_neon_vsra_n_v:
5134   case NEON::BI__builtin_neon_vsraq_n_v:
5135     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5136     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
5137     return Builder.CreateAdd(Ops[0], Ops[1]);
5138   case NEON::BI__builtin_neon_vst1q_lane_v:
5139     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
5140     // a one-element vector and avoid poor code for i64 in the backend.
5141     if (VTy->getElementType()->isIntegerTy(64)) {
5142       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5143       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
5144       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5145       Ops[2] = getAlignmentValue32(PtrOp0);
5146       llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
5147       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
5148                                                  Tys), Ops);
5149     }
5150     // fall through
5151   case NEON::BI__builtin_neon_vst1_lane_v: {
5152     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5153     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
5154     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5155     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
5156     return St;
5157   }
5158   case NEON::BI__builtin_neon_vtbl1_v:
5159     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
5160                         Ops, "vtbl1");
5161   case NEON::BI__builtin_neon_vtbl2_v:
5162     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
5163                         Ops, "vtbl2");
5164   case NEON::BI__builtin_neon_vtbl3_v:
5165     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
5166                         Ops, "vtbl3");
5167   case NEON::BI__builtin_neon_vtbl4_v:
5168     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
5169                         Ops, "vtbl4");
5170   case NEON::BI__builtin_neon_vtbx1_v:
5171     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
5172                         Ops, "vtbx1");
5173   case NEON::BI__builtin_neon_vtbx2_v:
5174     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
5175                         Ops, "vtbx2");
5176   case NEON::BI__builtin_neon_vtbx3_v:
5177     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
5178                         Ops, "vtbx3");
5179   case NEON::BI__builtin_neon_vtbx4_v:
5180     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
5181                         Ops, "vtbx4");
5182   }
5183 }
5184 
5185 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
5186                                       const CallExpr *E,
5187                                       SmallVectorImpl<Value *> &Ops) {
5188   unsigned int Int = 0;
5189   const char *s = nullptr;
5190 
5191   switch (BuiltinID) {
5192   default:
5193     return nullptr;
5194   case NEON::BI__builtin_neon_vtbl1_v:
5195   case NEON::BI__builtin_neon_vqtbl1_v:
5196   case NEON::BI__builtin_neon_vqtbl1q_v:
5197   case NEON::BI__builtin_neon_vtbl2_v:
5198   case NEON::BI__builtin_neon_vqtbl2_v:
5199   case NEON::BI__builtin_neon_vqtbl2q_v:
5200   case NEON::BI__builtin_neon_vtbl3_v:
5201   case NEON::BI__builtin_neon_vqtbl3_v:
5202   case NEON::BI__builtin_neon_vqtbl3q_v:
5203   case NEON::BI__builtin_neon_vtbl4_v:
5204   case NEON::BI__builtin_neon_vqtbl4_v:
5205   case NEON::BI__builtin_neon_vqtbl4q_v:
5206     break;
5207   case NEON::BI__builtin_neon_vtbx1_v:
5208   case NEON::BI__builtin_neon_vqtbx1_v:
5209   case NEON::BI__builtin_neon_vqtbx1q_v:
5210   case NEON::BI__builtin_neon_vtbx2_v:
5211   case NEON::BI__builtin_neon_vqtbx2_v:
5212   case NEON::BI__builtin_neon_vqtbx2q_v:
5213   case NEON::BI__builtin_neon_vtbx3_v:
5214   case NEON::BI__builtin_neon_vqtbx3_v:
5215   case NEON::BI__builtin_neon_vqtbx3q_v:
5216   case NEON::BI__builtin_neon_vtbx4_v:
5217   case NEON::BI__builtin_neon_vqtbx4_v:
5218   case NEON::BI__builtin_neon_vqtbx4q_v:
5219     break;
5220   }
5221 
5222   assert(E->getNumArgs() >= 3);
5223 
5224   // Get the last argument, which specifies the vector type.
5225   llvm::APSInt Result;
5226   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
5227   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
5228     return nullptr;
5229 
5230   // Determine the type of this overloaded NEON intrinsic.
5231   NeonTypeFlags Type(Result.getZExtValue());
5232   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
5233   if (!Ty)
5234     return nullptr;
5235 
5236   CodeGen::CGBuilderTy &Builder = CGF.Builder;
5237 
5238   // AArch64 scalar builtins are not overloaded, they do not have an extra
5239   // argument that specifies the vector type, need to handle each case.
5240   switch (BuiltinID) {
5241   case NEON::BI__builtin_neon_vtbl1_v: {
5242     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
5243                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
5244                               "vtbl1");
5245   }
5246   case NEON::BI__builtin_neon_vtbl2_v: {
5247     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
5248                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
5249                               "vtbl1");
5250   }
5251   case NEON::BI__builtin_neon_vtbl3_v: {
5252     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
5253                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
5254                               "vtbl2");
5255   }
5256   case NEON::BI__builtin_neon_vtbl4_v: {
5257     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
5258                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
5259                               "vtbl2");
5260   }
5261   case NEON::BI__builtin_neon_vtbx1_v: {
5262     Value *TblRes =
5263         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
5264                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
5265 
5266     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
5267     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
5268     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5269 
5270     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5271     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5272     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5273   }
5274   case NEON::BI__builtin_neon_vtbx2_v: {
5275     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
5276                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
5277                               "vtbx1");
5278   }
5279   case NEON::BI__builtin_neon_vtbx3_v: {
5280     Value *TblRes =
5281         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
5282                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
5283 
5284     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
5285     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
5286                                            TwentyFourV);
5287     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5288 
5289     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5290     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5291     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5292   }
5293   case NEON::BI__builtin_neon_vtbx4_v: {
5294     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
5295                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
5296                               "vtbx2");
5297   }
5298   case NEON::BI__builtin_neon_vqtbl1_v:
5299   case NEON::BI__builtin_neon_vqtbl1q_v:
5300     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
5301   case NEON::BI__builtin_neon_vqtbl2_v:
5302   case NEON::BI__builtin_neon_vqtbl2q_v: {
5303     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
5304   case NEON::BI__builtin_neon_vqtbl3_v:
5305   case NEON::BI__builtin_neon_vqtbl3q_v:
5306     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
5307   case NEON::BI__builtin_neon_vqtbl4_v:
5308   case NEON::BI__builtin_neon_vqtbl4q_v:
5309     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
5310   case NEON::BI__builtin_neon_vqtbx1_v:
5311   case NEON::BI__builtin_neon_vqtbx1q_v:
5312     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
5313   case NEON::BI__builtin_neon_vqtbx2_v:
5314   case NEON::BI__builtin_neon_vqtbx2q_v:
5315     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
5316   case NEON::BI__builtin_neon_vqtbx3_v:
5317   case NEON::BI__builtin_neon_vqtbx3q_v:
5318     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
5319   case NEON::BI__builtin_neon_vqtbx4_v:
5320   case NEON::BI__builtin_neon_vqtbx4q_v:
5321     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
5322   }
5323   }
5324 
5325   if (!Int)
5326     return nullptr;
5327 
5328   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
5329   return CGF.EmitNeonCall(F, Ops, s);
5330 }
5331 
5332 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
5333   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
5334   Op = Builder.CreateBitCast(Op, Int16Ty);
5335   Value *V = UndefValue::get(VTy);
5336   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
5337   Op = Builder.CreateInsertElement(V, Op, CI);
5338   return Op;
5339 }
5340 
5341 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
5342                                                const CallExpr *E) {
5343   unsigned HintID = static_cast<unsigned>(-1);
5344   switch (BuiltinID) {
5345   default: break;
5346   case AArch64::BI__builtin_arm_nop:
5347     HintID = 0;
5348     break;
5349   case AArch64::BI__builtin_arm_yield:
5350     HintID = 1;
5351     break;
5352   case AArch64::BI__builtin_arm_wfe:
5353     HintID = 2;
5354     break;
5355   case AArch64::BI__builtin_arm_wfi:
5356     HintID = 3;
5357     break;
5358   case AArch64::BI__builtin_arm_sev:
5359     HintID = 4;
5360     break;
5361   case AArch64::BI__builtin_arm_sevl:
5362     HintID = 5;
5363     break;
5364   }
5365 
5366   if (HintID != static_cast<unsigned>(-1)) {
5367     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5368     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5369   }
5370 
5371   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
5372     Value *Address         = EmitScalarExpr(E->getArg(0));
5373     Value *RW              = EmitScalarExpr(E->getArg(1));
5374     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
5375     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
5376     Value *IsData          = EmitScalarExpr(E->getArg(4));
5377 
5378     Value *Locality = nullptr;
5379     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
5380       // Temporal fetch, needs to convert cache level to locality.
5381       Locality = llvm::ConstantInt::get(Int32Ty,
5382         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
5383     } else {
5384       // Streaming fetch.
5385       Locality = llvm::ConstantInt::get(Int32Ty, 0);
5386     }
5387 
5388     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
5389     // PLDL3STRM or PLDL2STRM.
5390     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
5391     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5392   }
5393 
5394   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
5395     assert((getContext().getTypeSize(E->getType()) == 32) &&
5396            "rbit of unusual size!");
5397     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5398     return Builder.CreateCall(
5399         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5400   }
5401   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
5402     assert((getContext().getTypeSize(E->getType()) == 64) &&
5403            "rbit of unusual size!");
5404     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5405     return Builder.CreateCall(
5406         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5407   }
5408 
5409   if (BuiltinID == AArch64::BI__clear_cache) {
5410     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5411     const FunctionDecl *FD = E->getDirectCallee();
5412     Value *Ops[2];
5413     for (unsigned i = 0; i < 2; i++)
5414       Ops[i] = EmitScalarExpr(E->getArg(i));
5415     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5416     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5417     StringRef Name = FD->getName();
5418     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5419   }
5420 
5421   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5422       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
5423       getContext().getTypeSize(E->getType()) == 128) {
5424     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5425                                        ? Intrinsic::aarch64_ldaxp
5426                                        : Intrinsic::aarch64_ldxp);
5427 
5428     Value *LdPtr = EmitScalarExpr(E->getArg(0));
5429     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
5430                                     "ldxp");
5431 
5432     Value *Val0 = Builder.CreateExtractValue(Val, 1);
5433     Value *Val1 = Builder.CreateExtractValue(Val, 0);
5434     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5435     Val0 = Builder.CreateZExt(Val0, Int128Ty);
5436     Val1 = Builder.CreateZExt(Val1, Int128Ty);
5437 
5438     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5439     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5440     Val = Builder.CreateOr(Val, Val1);
5441     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5442   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5443              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
5444     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5445 
5446     QualType Ty = E->getType();
5447     llvm::Type *RealResTy = ConvertType(Ty);
5448     llvm::Type *PtrTy = llvm::IntegerType::get(
5449         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
5450     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
5451 
5452     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5453                                        ? Intrinsic::aarch64_ldaxr
5454                                        : Intrinsic::aarch64_ldxr,
5455                                    PtrTy);
5456     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5457 
5458     if (RealResTy->isPointerTy())
5459       return Builder.CreateIntToPtr(Val, RealResTy);
5460 
5461     llvm::Type *IntResTy = llvm::IntegerType::get(
5462         getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5463     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
5464     return Builder.CreateBitCast(Val, RealResTy);
5465   }
5466 
5467   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
5468        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
5469       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5470     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5471                                        ? Intrinsic::aarch64_stlxp
5472                                        : Intrinsic::aarch64_stxp);
5473     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
5474 
5475     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5476     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5477 
5478     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
5479     llvm::Value *Val = Builder.CreateLoad(Tmp);
5480 
5481     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5482     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5483     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
5484                                          Int8PtrTy);
5485     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5486   }
5487 
5488   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
5489       BuiltinID == AArch64::BI__builtin_arm_stlex) {
5490     Value *StoreVal = EmitScalarExpr(E->getArg(0));
5491     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5492 
5493     QualType Ty = E->getArg(0)->getType();
5494     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
5495                                                  getContext().getTypeSize(Ty));
5496     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
5497 
5498     if (StoreVal->getType()->isPointerTy())
5499       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5500     else {
5501       llvm::Type *IntTy = llvm::IntegerType::get(
5502           getLLVMContext(),
5503           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5504       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5505       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5506     }
5507 
5508     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5509                                        ? Intrinsic::aarch64_stlxr
5510                                        : Intrinsic::aarch64_stxr,
5511                                    StoreAddr->getType());
5512     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5513   }
5514 
5515   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
5516     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5517     return Builder.CreateCall(F);
5518   }
5519 
5520   // CRC32
5521   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5522   switch (BuiltinID) {
5523   case AArch64::BI__builtin_arm_crc32b:
5524     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5525   case AArch64::BI__builtin_arm_crc32cb:
5526     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5527   case AArch64::BI__builtin_arm_crc32h:
5528     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5529   case AArch64::BI__builtin_arm_crc32ch:
5530     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5531   case AArch64::BI__builtin_arm_crc32w:
5532     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5533   case AArch64::BI__builtin_arm_crc32cw:
5534     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5535   case AArch64::BI__builtin_arm_crc32d:
5536     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5537   case AArch64::BI__builtin_arm_crc32cd:
5538     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5539   }
5540 
5541   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5542     Value *Arg0 = EmitScalarExpr(E->getArg(0));
5543     Value *Arg1 = EmitScalarExpr(E->getArg(1));
5544     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5545 
5546     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5547     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5548 
5549     return Builder.CreateCall(F, {Arg0, Arg1});
5550   }
5551 
5552   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
5553       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5554       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5555       BuiltinID == AArch64::BI__builtin_arm_wsr ||
5556       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
5557       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
5558 
5559     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
5560                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5561                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
5562 
5563     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5564                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
5565 
5566     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
5567                    BuiltinID != AArch64::BI__builtin_arm_wsr;
5568 
5569     llvm::Type *ValueType;
5570     llvm::Type *RegisterType = Int64Ty;
5571     if (IsPointerBuiltin) {
5572       ValueType = VoidPtrTy;
5573     } else if (Is64Bit) {
5574       ValueType = Int64Ty;
5575     } else {
5576       ValueType = Int32Ty;
5577     }
5578 
5579     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5580   }
5581 
5582   // Find out if any arguments are required to be integer constant
5583   // expressions.
5584   unsigned ICEArguments = 0;
5585   ASTContext::GetBuiltinTypeError Error;
5586   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5587   assert(Error == ASTContext::GE_None && "Should not codegen an error");
5588 
5589   llvm::SmallVector<Value*, 4> Ops;
5590   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5591     if ((ICEArguments & (1 << i)) == 0) {
5592       Ops.push_back(EmitScalarExpr(E->getArg(i)));
5593     } else {
5594       // If this is required to be a constant, constant fold it so that we know
5595       // that the generated intrinsic gets a ConstantInt.
5596       llvm::APSInt Result;
5597       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5598       assert(IsConst && "Constant arg isn't actually constant?");
5599       (void)IsConst;
5600       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5601     }
5602   }
5603 
5604   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
5605   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5606       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5607 
5608   if (Builtin) {
5609     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5610     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
5611     assert(Result && "SISD intrinsic should have been handled");
5612     return Result;
5613   }
5614 
5615   llvm::APSInt Result;
5616   const Expr *Arg = E->getArg(E->getNumArgs()-1);
5617   NeonTypeFlags Type(0);
5618   if (Arg->isIntegerConstantExpr(Result, getContext()))
5619     // Determine the type of this overloaded NEON intrinsic.
5620     Type = NeonTypeFlags(Result.getZExtValue());
5621 
5622   bool usgn = Type.isUnsigned();
5623   bool quad = Type.isQuad();
5624 
5625   // Handle non-overloaded intrinsics first.
5626   switch (BuiltinID) {
5627   default: break;
5628   case NEON::BI__builtin_neon_vldrq_p128: {
5629     llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5630     llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
5631     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
5632     return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5633                                      CharUnits::fromQuantity(16));
5634   }
5635   case NEON::BI__builtin_neon_vstrq_p128: {
5636     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5637     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
5638     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5639   }
5640   case NEON::BI__builtin_neon_vcvts_u32_f32:
5641   case NEON::BI__builtin_neon_vcvtd_u64_f64:
5642     usgn = true;
5643     // FALL THROUGH
5644   case NEON::BI__builtin_neon_vcvts_s32_f32:
5645   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
5646     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5647     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5648     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5649     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5650     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
5651     if (usgn)
5652       return Builder.CreateFPToUI(Ops[0], InTy);
5653     return Builder.CreateFPToSI(Ops[0], InTy);
5654   }
5655   case NEON::BI__builtin_neon_vcvts_f32_u32:
5656   case NEON::BI__builtin_neon_vcvtd_f64_u64:
5657     usgn = true;
5658     // FALL THROUGH
5659   case NEON::BI__builtin_neon_vcvts_f32_s32:
5660   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5661     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5662     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5663     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5664     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5665     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5666     if (usgn)
5667       return Builder.CreateUIToFP(Ops[0], FTy);
5668     return Builder.CreateSIToFP(Ops[0], FTy);
5669   }
5670   case NEON::BI__builtin_neon_vpaddd_s64: {
5671     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
5672     Value *Vec = EmitScalarExpr(E->getArg(0));
5673     // The vector is v2f64, so make sure it's bitcast to that.
5674     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5675     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5676     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5677     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5678     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5679     // Pairwise addition of a v2f64 into a scalar f64.
5680     return Builder.CreateAdd(Op0, Op1, "vpaddd");
5681   }
5682   case NEON::BI__builtin_neon_vpaddd_f64: {
5683     llvm::Type *Ty =
5684       llvm::VectorType::get(DoubleTy, 2);
5685     Value *Vec = EmitScalarExpr(E->getArg(0));
5686     // The vector is v2f64, so make sure it's bitcast to that.
5687     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5688     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5689     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5690     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5691     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5692     // Pairwise addition of a v2f64 into a scalar f64.
5693     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5694   }
5695   case NEON::BI__builtin_neon_vpadds_f32: {
5696     llvm::Type *Ty =
5697       llvm::VectorType::get(FloatTy, 2);
5698     Value *Vec = EmitScalarExpr(E->getArg(0));
5699     // The vector is v2f32, so make sure it's bitcast to that.
5700     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5701     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5702     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5703     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5704     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5705     // Pairwise addition of a v2f32 into a scalar f32.
5706     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5707   }
5708   case NEON::BI__builtin_neon_vceqzd_s64:
5709   case NEON::BI__builtin_neon_vceqzd_f64:
5710   case NEON::BI__builtin_neon_vceqzs_f32:
5711     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5712     return EmitAArch64CompareBuiltinExpr(
5713         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5714         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5715   case NEON::BI__builtin_neon_vcgezd_s64:
5716   case NEON::BI__builtin_neon_vcgezd_f64:
5717   case NEON::BI__builtin_neon_vcgezs_f32:
5718     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5719     return EmitAArch64CompareBuiltinExpr(
5720         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5721         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5722   case NEON::BI__builtin_neon_vclezd_s64:
5723   case NEON::BI__builtin_neon_vclezd_f64:
5724   case NEON::BI__builtin_neon_vclezs_f32:
5725     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5726     return EmitAArch64CompareBuiltinExpr(
5727         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5728         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
5729   case NEON::BI__builtin_neon_vcgtzd_s64:
5730   case NEON::BI__builtin_neon_vcgtzd_f64:
5731   case NEON::BI__builtin_neon_vcgtzs_f32:
5732     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5733     return EmitAArch64CompareBuiltinExpr(
5734         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5735         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
5736   case NEON::BI__builtin_neon_vcltzd_s64:
5737   case NEON::BI__builtin_neon_vcltzd_f64:
5738   case NEON::BI__builtin_neon_vcltzs_f32:
5739     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5740     return EmitAArch64CompareBuiltinExpr(
5741         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5742         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
5743 
5744   case NEON::BI__builtin_neon_vceqzd_u64: {
5745     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5746     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5747     Ops[0] =
5748         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
5749     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
5750   }
5751   case NEON::BI__builtin_neon_vceqd_f64:
5752   case NEON::BI__builtin_neon_vcled_f64:
5753   case NEON::BI__builtin_neon_vcltd_f64:
5754   case NEON::BI__builtin_neon_vcged_f64:
5755   case NEON::BI__builtin_neon_vcgtd_f64: {
5756     llvm::CmpInst::Predicate P;
5757     switch (BuiltinID) {
5758     default: llvm_unreachable("missing builtin ID in switch!");
5759     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5760     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5761     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5762     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5763     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5764     }
5765     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5766     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5767     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5768     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5769     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5770   }
5771   case NEON::BI__builtin_neon_vceqs_f32:
5772   case NEON::BI__builtin_neon_vcles_f32:
5773   case NEON::BI__builtin_neon_vclts_f32:
5774   case NEON::BI__builtin_neon_vcges_f32:
5775   case NEON::BI__builtin_neon_vcgts_f32: {
5776     llvm::CmpInst::Predicate P;
5777     switch (BuiltinID) {
5778     default: llvm_unreachable("missing builtin ID in switch!");
5779     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5780     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5781     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5782     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5783     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5784     }
5785     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5786     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5787     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5788     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5789     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5790   }
5791   case NEON::BI__builtin_neon_vceqd_s64:
5792   case NEON::BI__builtin_neon_vceqd_u64:
5793   case NEON::BI__builtin_neon_vcgtd_s64:
5794   case NEON::BI__builtin_neon_vcgtd_u64:
5795   case NEON::BI__builtin_neon_vcltd_s64:
5796   case NEON::BI__builtin_neon_vcltd_u64:
5797   case NEON::BI__builtin_neon_vcged_u64:
5798   case NEON::BI__builtin_neon_vcged_s64:
5799   case NEON::BI__builtin_neon_vcled_u64:
5800   case NEON::BI__builtin_neon_vcled_s64: {
5801     llvm::CmpInst::Predicate P;
5802     switch (BuiltinID) {
5803     default: llvm_unreachable("missing builtin ID in switch!");
5804     case NEON::BI__builtin_neon_vceqd_s64:
5805     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5806     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5807     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5808     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5809     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5810     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5811     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5812     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5813     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5814     }
5815     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5816     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5817     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5818     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5819     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5820   }
5821   case NEON::BI__builtin_neon_vtstd_s64:
5822   case NEON::BI__builtin_neon_vtstd_u64: {
5823     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5824     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5825     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5826     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5827     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5828                                 llvm::Constant::getNullValue(Int64Ty));
5829     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5830   }
5831   case NEON::BI__builtin_neon_vset_lane_i8:
5832   case NEON::BI__builtin_neon_vset_lane_i16:
5833   case NEON::BI__builtin_neon_vset_lane_i32:
5834   case NEON::BI__builtin_neon_vset_lane_i64:
5835   case NEON::BI__builtin_neon_vset_lane_f32:
5836   case NEON::BI__builtin_neon_vsetq_lane_i8:
5837   case NEON::BI__builtin_neon_vsetq_lane_i16:
5838   case NEON::BI__builtin_neon_vsetq_lane_i32:
5839   case NEON::BI__builtin_neon_vsetq_lane_i64:
5840   case NEON::BI__builtin_neon_vsetq_lane_f32:
5841     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5842     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5843   case NEON::BI__builtin_neon_vset_lane_f64:
5844     // The vector type needs a cast for the v1f64 variant.
5845     Ops[1] = Builder.CreateBitCast(Ops[1],
5846                                    llvm::VectorType::get(DoubleTy, 1));
5847     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5848     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5849   case NEON::BI__builtin_neon_vsetq_lane_f64:
5850     // The vector type needs a cast for the v2f64 variant.
5851     Ops[1] = Builder.CreateBitCast(Ops[1],
5852         llvm::VectorType::get(DoubleTy, 2));
5853     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5854     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5855 
5856   case NEON::BI__builtin_neon_vget_lane_i8:
5857   case NEON::BI__builtin_neon_vdupb_lane_i8:
5858     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
5859     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5860                                         "vget_lane");
5861   case NEON::BI__builtin_neon_vgetq_lane_i8:
5862   case NEON::BI__builtin_neon_vdupb_laneq_i8:
5863     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
5864     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5865                                         "vgetq_lane");
5866   case NEON::BI__builtin_neon_vget_lane_i16:
5867   case NEON::BI__builtin_neon_vduph_lane_i16:
5868     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
5869     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5870                                         "vget_lane");
5871   case NEON::BI__builtin_neon_vgetq_lane_i16:
5872   case NEON::BI__builtin_neon_vduph_laneq_i16:
5873     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
5874     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5875                                         "vgetq_lane");
5876   case NEON::BI__builtin_neon_vget_lane_i32:
5877   case NEON::BI__builtin_neon_vdups_lane_i32:
5878     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
5879     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5880                                         "vget_lane");
5881   case NEON::BI__builtin_neon_vdups_lane_f32:
5882     Ops[0] = Builder.CreateBitCast(Ops[0],
5883         llvm::VectorType::get(FloatTy, 2));
5884     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5885                                         "vdups_lane");
5886   case NEON::BI__builtin_neon_vgetq_lane_i32:
5887   case NEON::BI__builtin_neon_vdups_laneq_i32:
5888     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
5889     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5890                                         "vgetq_lane");
5891   case NEON::BI__builtin_neon_vget_lane_i64:
5892   case NEON::BI__builtin_neon_vdupd_lane_i64:
5893     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
5894     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5895                                         "vget_lane");
5896   case NEON::BI__builtin_neon_vdupd_lane_f64:
5897     Ops[0] = Builder.CreateBitCast(Ops[0],
5898         llvm::VectorType::get(DoubleTy, 1));
5899     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5900                                         "vdupd_lane");
5901   case NEON::BI__builtin_neon_vgetq_lane_i64:
5902   case NEON::BI__builtin_neon_vdupd_laneq_i64:
5903     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
5904     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5905                                         "vgetq_lane");
5906   case NEON::BI__builtin_neon_vget_lane_f32:
5907     Ops[0] = Builder.CreateBitCast(Ops[0],
5908         llvm::VectorType::get(FloatTy, 2));
5909     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5910                                         "vget_lane");
5911   case NEON::BI__builtin_neon_vget_lane_f64:
5912     Ops[0] = Builder.CreateBitCast(Ops[0],
5913         llvm::VectorType::get(DoubleTy, 1));
5914     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5915                                         "vget_lane");
5916   case NEON::BI__builtin_neon_vgetq_lane_f32:
5917   case NEON::BI__builtin_neon_vdups_laneq_f32:
5918     Ops[0] = Builder.CreateBitCast(Ops[0],
5919         llvm::VectorType::get(FloatTy, 4));
5920     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5921                                         "vgetq_lane");
5922   case NEON::BI__builtin_neon_vgetq_lane_f64:
5923   case NEON::BI__builtin_neon_vdupd_laneq_f64:
5924     Ops[0] = Builder.CreateBitCast(Ops[0],
5925         llvm::VectorType::get(DoubleTy, 2));
5926     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5927                                         "vgetq_lane");
5928   case NEON::BI__builtin_neon_vaddd_s64:
5929   case NEON::BI__builtin_neon_vaddd_u64:
5930     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
5931   case NEON::BI__builtin_neon_vsubd_s64:
5932   case NEON::BI__builtin_neon_vsubd_u64:
5933     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
5934   case NEON::BI__builtin_neon_vqdmlalh_s16:
5935   case NEON::BI__builtin_neon_vqdmlslh_s16: {
5936     SmallVector<Value *, 2> ProductOps;
5937     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5938     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
5939     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5940     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5941                           ProductOps, "vqdmlXl");
5942     Constant *CI = ConstantInt::get(SizeTy, 0);
5943     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5944 
5945     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5946                                         ? Intrinsic::aarch64_neon_sqadd
5947                                         : Intrinsic::aarch64_neon_sqsub;
5948     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5949   }
5950   case NEON::BI__builtin_neon_vqshlud_n_s64: {
5951     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5952     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5953     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5954                         Ops, "vqshlu_n");
5955   }
5956   case NEON::BI__builtin_neon_vqshld_n_u64:
5957   case NEON::BI__builtin_neon_vqshld_n_s64: {
5958     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5959                                    ? Intrinsic::aarch64_neon_uqshl
5960                                    : Intrinsic::aarch64_neon_sqshl;
5961     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5962     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5963     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5964   }
5965   case NEON::BI__builtin_neon_vrshrd_n_u64:
5966   case NEON::BI__builtin_neon_vrshrd_n_s64: {
5967     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5968                                    ? Intrinsic::aarch64_neon_urshl
5969                                    : Intrinsic::aarch64_neon_srshl;
5970     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5971     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5972     Ops[1] = ConstantInt::get(Int64Ty, -SV);
5973     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5974   }
5975   case NEON::BI__builtin_neon_vrsrad_n_u64:
5976   case NEON::BI__builtin_neon_vrsrad_n_s64: {
5977     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5978                                    ? Intrinsic::aarch64_neon_urshl
5979                                    : Intrinsic::aarch64_neon_srshl;
5980     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5981     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
5982     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5983                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5984     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5985   }
5986   case NEON::BI__builtin_neon_vshld_n_s64:
5987   case NEON::BI__builtin_neon_vshld_n_u64: {
5988     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5989     return Builder.CreateShl(
5990         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5991   }
5992   case NEON::BI__builtin_neon_vshrd_n_s64: {
5993     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5994     return Builder.CreateAShr(
5995         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5996                                                    Amt->getZExtValue())),
5997         "shrd_n");
5998   }
5999   case NEON::BI__builtin_neon_vshrd_n_u64: {
6000     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6001     uint64_t ShiftAmt = Amt->getZExtValue();
6002     // Right-shifting an unsigned value by its size yields 0.
6003     if (ShiftAmt == 64)
6004       return ConstantInt::get(Int64Ty, 0);
6005     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
6006                               "shrd_n");
6007   }
6008   case NEON::BI__builtin_neon_vsrad_n_s64: {
6009     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6010     Ops[1] = Builder.CreateAShr(
6011         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6012                                                    Amt->getZExtValue())),
6013         "shrd_n");
6014     return Builder.CreateAdd(Ops[0], Ops[1]);
6015   }
6016   case NEON::BI__builtin_neon_vsrad_n_u64: {
6017     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6018     uint64_t ShiftAmt = Amt->getZExtValue();
6019     // Right-shifting an unsigned value by its size yields 0.
6020     // As Op + 0 = Op, return Ops[0] directly.
6021     if (ShiftAmt == 64)
6022       return Ops[0];
6023     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
6024                                 "shrd_n");
6025     return Builder.CreateAdd(Ops[0], Ops[1]);
6026   }
6027   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6028   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6029   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6030   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6031     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6032                                           "lane");
6033     SmallVector<Value *, 2> ProductOps;
6034     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6035     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
6036     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
6037     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6038                           ProductOps, "vqdmlXl");
6039     Constant *CI = ConstantInt::get(SizeTy, 0);
6040     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6041     Ops.pop_back();
6042 
6043     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6044                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6045                           ? Intrinsic::aarch64_neon_sqadd
6046                           : Intrinsic::aarch64_neon_sqsub;
6047     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
6048   }
6049   case NEON::BI__builtin_neon_vqdmlals_s32:
6050   case NEON::BI__builtin_neon_vqdmlsls_s32: {
6051     SmallVector<Value *, 2> ProductOps;
6052     ProductOps.push_back(Ops[1]);
6053     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
6054     Ops[1] =
6055         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6056                      ProductOps, "vqdmlXl");
6057 
6058     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6059                                         ? Intrinsic::aarch64_neon_sqadd
6060                                         : Intrinsic::aarch64_neon_sqsub;
6061     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
6062   }
6063   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6064   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6065   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6066   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6067     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6068                                           "lane");
6069     SmallVector<Value *, 2> ProductOps;
6070     ProductOps.push_back(Ops[1]);
6071     ProductOps.push_back(Ops[2]);
6072     Ops[1] =
6073         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6074                      ProductOps, "vqdmlXl");
6075     Ops.pop_back();
6076 
6077     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6078                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6079                           ? Intrinsic::aarch64_neon_sqadd
6080                           : Intrinsic::aarch64_neon_sqsub;
6081     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6082   }
6083   }
6084 
6085   llvm::VectorType *VTy = GetNeonType(this, Type);
6086   llvm::Type *Ty = VTy;
6087   if (!Ty)
6088     return nullptr;
6089 
6090   // Not all intrinsics handled by the common case work for AArch64 yet, so only
6091   // defer to common code if it's been added to our special map.
6092   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
6093                                    AArch64SIMDIntrinsicsProvenSorted);
6094 
6095   if (Builtin)
6096     return EmitCommonNeonBuiltinExpr(
6097         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6098         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
6099         /*never use addresses*/ Address::invalid(), Address::invalid());
6100 
6101   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
6102     return V;
6103 
6104   unsigned Int;
6105   switch (BuiltinID) {
6106   default: return nullptr;
6107   case NEON::BI__builtin_neon_vbsl_v:
6108   case NEON::BI__builtin_neon_vbslq_v: {
6109     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6110     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6111     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6112     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6113 
6114     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6115     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6116     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6117     return Builder.CreateBitCast(Ops[0], Ty);
6118   }
6119   case NEON::BI__builtin_neon_vfma_lane_v:
6120   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6121     // The ARM builtins (and instructions) have the addend as the first
6122     // operand, but the 'fma' intrinsics have it last. Swap it around here.
6123     Value *Addend = Ops[0];
6124     Value *Multiplicand = Ops[1];
6125     Value *LaneSource = Ops[2];
6126     Ops[0] = Multiplicand;
6127     Ops[1] = LaneSource;
6128     Ops[2] = Addend;
6129 
6130     // Now adjust things to handle the lane access.
6131     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
6132       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
6133       VTy;
6134     llvm::Constant *cst = cast<Constant>(Ops[3]);
6135     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
6136     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6137     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6138 
6139     Ops.pop_back();
6140     Int = Intrinsic::fma;
6141     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6142   }
6143   case NEON::BI__builtin_neon_vfma_laneq_v: {
6144     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6145     // v1f64 fma should be mapped to Neon scalar f64 fma
6146     if (VTy && VTy->getElementType() == DoubleTy) {
6147       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6148       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6149       llvm::Type *VTy = GetNeonType(this,
6150         NeonTypeFlags(NeonTypeFlags::Float64, false, true));
6151       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6152       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6153       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
6154       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6155       return Builder.CreateBitCast(Result, Ty);
6156     }
6157     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6158     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6159     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6160 
6161     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
6162                                             VTy->getNumElements() * 2);
6163     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6164     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
6165                                                cast<ConstantInt>(Ops[3]));
6166     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6167 
6168     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6169   }
6170   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6171     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6172     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6173     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6174 
6175     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6176     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6177     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6178   }
6179   case NEON::BI__builtin_neon_vfmas_lane_f32:
6180   case NEON::BI__builtin_neon_vfmas_laneq_f32:
6181   case NEON::BI__builtin_neon_vfmad_lane_f64:
6182   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6183     Ops.push_back(EmitScalarExpr(E->getArg(3)));
6184     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6185     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6186     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6187     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6188   }
6189   case NEON::BI__builtin_neon_vmull_v:
6190     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6191     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6192     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6193     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6194   case NEON::BI__builtin_neon_vmax_v:
6195   case NEON::BI__builtin_neon_vmaxq_v:
6196     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6197     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6198     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6199     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6200   case NEON::BI__builtin_neon_vmin_v:
6201   case NEON::BI__builtin_neon_vminq_v:
6202     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6203     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6204     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6205     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6206   case NEON::BI__builtin_neon_vabd_v:
6207   case NEON::BI__builtin_neon_vabdq_v:
6208     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6209     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6210     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6211     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6212   case NEON::BI__builtin_neon_vpadal_v:
6213   case NEON::BI__builtin_neon_vpadalq_v: {
6214     unsigned ArgElts = VTy->getNumElements();
6215     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6216     unsigned BitWidth = EltTy->getBitWidth();
6217     llvm::Type *ArgTy = llvm::VectorType::get(
6218         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
6219     llvm::Type* Tys[2] = { VTy, ArgTy };
6220     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6221     SmallVector<llvm::Value*, 1> TmpOps;
6222     TmpOps.push_back(Ops[1]);
6223     Function *F = CGM.getIntrinsic(Int, Tys);
6224     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6225     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6226     return Builder.CreateAdd(tmp, addend);
6227   }
6228   case NEON::BI__builtin_neon_vpmin_v:
6229   case NEON::BI__builtin_neon_vpminq_v:
6230     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6231     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6232     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6233     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6234   case NEON::BI__builtin_neon_vpmax_v:
6235   case NEON::BI__builtin_neon_vpmaxq_v:
6236     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6237     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6238     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6239     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6240   case NEON::BI__builtin_neon_vminnm_v:
6241   case NEON::BI__builtin_neon_vminnmq_v:
6242     Int = Intrinsic::aarch64_neon_fminnm;
6243     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6244   case NEON::BI__builtin_neon_vmaxnm_v:
6245   case NEON::BI__builtin_neon_vmaxnmq_v:
6246     Int = Intrinsic::aarch64_neon_fmaxnm;
6247     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6248   case NEON::BI__builtin_neon_vrecpss_f32: {
6249     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6250     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6251                         Ops, "vrecps");
6252   }
6253   case NEON::BI__builtin_neon_vrecpsd_f64: {
6254     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6255     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6256                         Ops, "vrecps");
6257   }
6258   case NEON::BI__builtin_neon_vqshrun_n_v:
6259     Int = Intrinsic::aarch64_neon_sqshrun;
6260     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6261   case NEON::BI__builtin_neon_vqrshrun_n_v:
6262     Int = Intrinsic::aarch64_neon_sqrshrun;
6263     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6264   case NEON::BI__builtin_neon_vqshrn_n_v:
6265     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6266     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6267   case NEON::BI__builtin_neon_vrshrn_n_v:
6268     Int = Intrinsic::aarch64_neon_rshrn;
6269     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6270   case NEON::BI__builtin_neon_vqrshrn_n_v:
6271     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6272     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6273   case NEON::BI__builtin_neon_vrnda_v:
6274   case NEON::BI__builtin_neon_vrndaq_v: {
6275     Int = Intrinsic::round;
6276     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6277   }
6278   case NEON::BI__builtin_neon_vrndi_v:
6279   case NEON::BI__builtin_neon_vrndiq_v: {
6280     Int = Intrinsic::nearbyint;
6281     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
6282   }
6283   case NEON::BI__builtin_neon_vrndm_v:
6284   case NEON::BI__builtin_neon_vrndmq_v: {
6285     Int = Intrinsic::floor;
6286     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6287   }
6288   case NEON::BI__builtin_neon_vrndn_v:
6289   case NEON::BI__builtin_neon_vrndnq_v: {
6290     Int = Intrinsic::aarch64_neon_frintn;
6291     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6292   }
6293   case NEON::BI__builtin_neon_vrndp_v:
6294   case NEON::BI__builtin_neon_vrndpq_v: {
6295     Int = Intrinsic::ceil;
6296     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6297   }
6298   case NEON::BI__builtin_neon_vrndx_v:
6299   case NEON::BI__builtin_neon_vrndxq_v: {
6300     Int = Intrinsic::rint;
6301     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6302   }
6303   case NEON::BI__builtin_neon_vrnd_v:
6304   case NEON::BI__builtin_neon_vrndq_v: {
6305     Int = Intrinsic::trunc;
6306     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6307   }
6308   case NEON::BI__builtin_neon_vceqz_v:
6309   case NEON::BI__builtin_neon_vceqzq_v:
6310     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
6311                                          ICmpInst::ICMP_EQ, "vceqz");
6312   case NEON::BI__builtin_neon_vcgez_v:
6313   case NEON::BI__builtin_neon_vcgezq_v:
6314     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
6315                                          ICmpInst::ICMP_SGE, "vcgez");
6316   case NEON::BI__builtin_neon_vclez_v:
6317   case NEON::BI__builtin_neon_vclezq_v:
6318     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
6319                                          ICmpInst::ICMP_SLE, "vclez");
6320   case NEON::BI__builtin_neon_vcgtz_v:
6321   case NEON::BI__builtin_neon_vcgtzq_v:
6322     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
6323                                          ICmpInst::ICMP_SGT, "vcgtz");
6324   case NEON::BI__builtin_neon_vcltz_v:
6325   case NEON::BI__builtin_neon_vcltzq_v:
6326     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
6327                                          ICmpInst::ICMP_SLT, "vcltz");
6328   case NEON::BI__builtin_neon_vcvt_f64_v:
6329   case NEON::BI__builtin_neon_vcvtq_f64_v:
6330     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6331     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6332     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6333                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6334   case NEON::BI__builtin_neon_vcvt_f64_f32: {
6335     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6336            "unexpected vcvt_f64_f32 builtin");
6337     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6338     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6339 
6340     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6341   }
6342   case NEON::BI__builtin_neon_vcvt_f32_f64: {
6343     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6344            "unexpected vcvt_f32_f64 builtin");
6345     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6346     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6347 
6348     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6349   }
6350   case NEON::BI__builtin_neon_vcvt_s32_v:
6351   case NEON::BI__builtin_neon_vcvt_u32_v:
6352   case NEON::BI__builtin_neon_vcvt_s64_v:
6353   case NEON::BI__builtin_neon_vcvt_u64_v:
6354   case NEON::BI__builtin_neon_vcvtq_s32_v:
6355   case NEON::BI__builtin_neon_vcvtq_u32_v:
6356   case NEON::BI__builtin_neon_vcvtq_s64_v:
6357   case NEON::BI__builtin_neon_vcvtq_u64_v: {
6358     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
6359     if (usgn)
6360       return Builder.CreateFPToUI(Ops[0], Ty);
6361     return Builder.CreateFPToSI(Ops[0], Ty);
6362   }
6363   case NEON::BI__builtin_neon_vcvta_s32_v:
6364   case NEON::BI__builtin_neon_vcvtaq_s32_v:
6365   case NEON::BI__builtin_neon_vcvta_u32_v:
6366   case NEON::BI__builtin_neon_vcvtaq_u32_v:
6367   case NEON::BI__builtin_neon_vcvta_s64_v:
6368   case NEON::BI__builtin_neon_vcvtaq_s64_v:
6369   case NEON::BI__builtin_neon_vcvta_u64_v:
6370   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6371     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6372     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6373     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6374   }
6375   case NEON::BI__builtin_neon_vcvtm_s32_v:
6376   case NEON::BI__builtin_neon_vcvtmq_s32_v:
6377   case NEON::BI__builtin_neon_vcvtm_u32_v:
6378   case NEON::BI__builtin_neon_vcvtmq_u32_v:
6379   case NEON::BI__builtin_neon_vcvtm_s64_v:
6380   case NEON::BI__builtin_neon_vcvtmq_s64_v:
6381   case NEON::BI__builtin_neon_vcvtm_u64_v:
6382   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6383     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6384     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6385     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6386   }
6387   case NEON::BI__builtin_neon_vcvtn_s32_v:
6388   case NEON::BI__builtin_neon_vcvtnq_s32_v:
6389   case NEON::BI__builtin_neon_vcvtn_u32_v:
6390   case NEON::BI__builtin_neon_vcvtnq_u32_v:
6391   case NEON::BI__builtin_neon_vcvtn_s64_v:
6392   case NEON::BI__builtin_neon_vcvtnq_s64_v:
6393   case NEON::BI__builtin_neon_vcvtn_u64_v:
6394   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6395     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6396     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6397     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6398   }
6399   case NEON::BI__builtin_neon_vcvtp_s32_v:
6400   case NEON::BI__builtin_neon_vcvtpq_s32_v:
6401   case NEON::BI__builtin_neon_vcvtp_u32_v:
6402   case NEON::BI__builtin_neon_vcvtpq_u32_v:
6403   case NEON::BI__builtin_neon_vcvtp_s64_v:
6404   case NEON::BI__builtin_neon_vcvtpq_s64_v:
6405   case NEON::BI__builtin_neon_vcvtp_u64_v:
6406   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6407     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6408     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6409     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6410   }
6411   case NEON::BI__builtin_neon_vmulx_v:
6412   case NEON::BI__builtin_neon_vmulxq_v: {
6413     Int = Intrinsic::aarch64_neon_fmulx;
6414     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6415   }
6416   case NEON::BI__builtin_neon_vmul_lane_v:
6417   case NEON::BI__builtin_neon_vmul_laneq_v: {
6418     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6419     bool Quad = false;
6420     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6421       Quad = true;
6422     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6423     llvm::Type *VTy = GetNeonType(this,
6424       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
6425     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6426     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6427     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
6428     return Builder.CreateBitCast(Result, Ty);
6429   }
6430   case NEON::BI__builtin_neon_vnegd_s64:
6431     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
6432   case NEON::BI__builtin_neon_vpmaxnm_v:
6433   case NEON::BI__builtin_neon_vpmaxnmq_v: {
6434     Int = Intrinsic::aarch64_neon_fmaxnmp;
6435     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
6436   }
6437   case NEON::BI__builtin_neon_vpminnm_v:
6438   case NEON::BI__builtin_neon_vpminnmq_v: {
6439     Int = Intrinsic::aarch64_neon_fminnmp;
6440     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
6441   }
6442   case NEON::BI__builtin_neon_vsqrt_v:
6443   case NEON::BI__builtin_neon_vsqrtq_v: {
6444     Int = Intrinsic::sqrt;
6445     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6446     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
6447   }
6448   case NEON::BI__builtin_neon_vrbit_v:
6449   case NEON::BI__builtin_neon_vrbitq_v: {
6450     Int = Intrinsic::aarch64_neon_rbit;
6451     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
6452   }
6453   case NEON::BI__builtin_neon_vaddv_u8:
6454     // FIXME: These are handled by the AArch64 scalar code.
6455     usgn = true;
6456     // FALLTHROUGH
6457   case NEON::BI__builtin_neon_vaddv_s8: {
6458     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6459     Ty = Int32Ty;
6460     VTy = llvm::VectorType::get(Int8Ty, 8);
6461     llvm::Type *Tys[2] = { Ty, VTy };
6462     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6463     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6464     return Builder.CreateTrunc(Ops[0], Int8Ty);
6465   }
6466   case NEON::BI__builtin_neon_vaddv_u16:
6467     usgn = true;
6468     // FALLTHROUGH
6469   case NEON::BI__builtin_neon_vaddv_s16: {
6470     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6471     Ty = Int32Ty;
6472     VTy = llvm::VectorType::get(Int16Ty, 4);
6473     llvm::Type *Tys[2] = { Ty, VTy };
6474     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6475     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6476     return Builder.CreateTrunc(Ops[0], Int16Ty);
6477   }
6478   case NEON::BI__builtin_neon_vaddvq_u8:
6479     usgn = true;
6480     // FALLTHROUGH
6481   case NEON::BI__builtin_neon_vaddvq_s8: {
6482     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6483     Ty = Int32Ty;
6484     VTy = llvm::VectorType::get(Int8Ty, 16);
6485     llvm::Type *Tys[2] = { Ty, VTy };
6486     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6487     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6488     return Builder.CreateTrunc(Ops[0], Int8Ty);
6489   }
6490   case NEON::BI__builtin_neon_vaddvq_u16:
6491     usgn = true;
6492     // FALLTHROUGH
6493   case NEON::BI__builtin_neon_vaddvq_s16: {
6494     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6495     Ty = Int32Ty;
6496     VTy = llvm::VectorType::get(Int16Ty, 8);
6497     llvm::Type *Tys[2] = { Ty, VTy };
6498     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6499     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6500     return Builder.CreateTrunc(Ops[0], Int16Ty);
6501   }
6502   case NEON::BI__builtin_neon_vmaxv_u8: {
6503     Int = Intrinsic::aarch64_neon_umaxv;
6504     Ty = Int32Ty;
6505     VTy = llvm::VectorType::get(Int8Ty, 8);
6506     llvm::Type *Tys[2] = { Ty, VTy };
6507     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6508     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6509     return Builder.CreateTrunc(Ops[0], Int8Ty);
6510   }
6511   case NEON::BI__builtin_neon_vmaxv_u16: {
6512     Int = Intrinsic::aarch64_neon_umaxv;
6513     Ty = Int32Ty;
6514     VTy = llvm::VectorType::get(Int16Ty, 4);
6515     llvm::Type *Tys[2] = { Ty, VTy };
6516     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6517     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6518     return Builder.CreateTrunc(Ops[0], Int16Ty);
6519   }
6520   case NEON::BI__builtin_neon_vmaxvq_u8: {
6521     Int = Intrinsic::aarch64_neon_umaxv;
6522     Ty = Int32Ty;
6523     VTy = llvm::VectorType::get(Int8Ty, 16);
6524     llvm::Type *Tys[2] = { Ty, VTy };
6525     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6526     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6527     return Builder.CreateTrunc(Ops[0], Int8Ty);
6528   }
6529   case NEON::BI__builtin_neon_vmaxvq_u16: {
6530     Int = Intrinsic::aarch64_neon_umaxv;
6531     Ty = Int32Ty;
6532     VTy = llvm::VectorType::get(Int16Ty, 8);
6533     llvm::Type *Tys[2] = { Ty, VTy };
6534     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6535     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6536     return Builder.CreateTrunc(Ops[0], Int16Ty);
6537   }
6538   case NEON::BI__builtin_neon_vmaxv_s8: {
6539     Int = Intrinsic::aarch64_neon_smaxv;
6540     Ty = Int32Ty;
6541     VTy = llvm::VectorType::get(Int8Ty, 8);
6542     llvm::Type *Tys[2] = { Ty, VTy };
6543     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6544     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6545     return Builder.CreateTrunc(Ops[0], Int8Ty);
6546   }
6547   case NEON::BI__builtin_neon_vmaxv_s16: {
6548     Int = Intrinsic::aarch64_neon_smaxv;
6549     Ty = Int32Ty;
6550     VTy = llvm::VectorType::get(Int16Ty, 4);
6551     llvm::Type *Tys[2] = { Ty, VTy };
6552     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6553     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6554     return Builder.CreateTrunc(Ops[0], Int16Ty);
6555   }
6556   case NEON::BI__builtin_neon_vmaxvq_s8: {
6557     Int = Intrinsic::aarch64_neon_smaxv;
6558     Ty = Int32Ty;
6559     VTy = llvm::VectorType::get(Int8Ty, 16);
6560     llvm::Type *Tys[2] = { Ty, VTy };
6561     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6562     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6563     return Builder.CreateTrunc(Ops[0], Int8Ty);
6564   }
6565   case NEON::BI__builtin_neon_vmaxvq_s16: {
6566     Int = Intrinsic::aarch64_neon_smaxv;
6567     Ty = Int32Ty;
6568     VTy = llvm::VectorType::get(Int16Ty, 8);
6569     llvm::Type *Tys[2] = { Ty, VTy };
6570     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6571     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6572     return Builder.CreateTrunc(Ops[0], Int16Ty);
6573   }
6574   case NEON::BI__builtin_neon_vminv_u8: {
6575     Int = Intrinsic::aarch64_neon_uminv;
6576     Ty = Int32Ty;
6577     VTy = llvm::VectorType::get(Int8Ty, 8);
6578     llvm::Type *Tys[2] = { Ty, VTy };
6579     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6580     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6581     return Builder.CreateTrunc(Ops[0], Int8Ty);
6582   }
6583   case NEON::BI__builtin_neon_vminv_u16: {
6584     Int = Intrinsic::aarch64_neon_uminv;
6585     Ty = Int32Ty;
6586     VTy = llvm::VectorType::get(Int16Ty, 4);
6587     llvm::Type *Tys[2] = { Ty, VTy };
6588     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6589     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6590     return Builder.CreateTrunc(Ops[0], Int16Ty);
6591   }
6592   case NEON::BI__builtin_neon_vminvq_u8: {
6593     Int = Intrinsic::aarch64_neon_uminv;
6594     Ty = Int32Ty;
6595     VTy = llvm::VectorType::get(Int8Ty, 16);
6596     llvm::Type *Tys[2] = { Ty, VTy };
6597     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6598     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6599     return Builder.CreateTrunc(Ops[0], Int8Ty);
6600   }
6601   case NEON::BI__builtin_neon_vminvq_u16: {
6602     Int = Intrinsic::aarch64_neon_uminv;
6603     Ty = Int32Ty;
6604     VTy = llvm::VectorType::get(Int16Ty, 8);
6605     llvm::Type *Tys[2] = { Ty, VTy };
6606     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6607     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6608     return Builder.CreateTrunc(Ops[0], Int16Ty);
6609   }
6610   case NEON::BI__builtin_neon_vminv_s8: {
6611     Int = Intrinsic::aarch64_neon_sminv;
6612     Ty = Int32Ty;
6613     VTy = llvm::VectorType::get(Int8Ty, 8);
6614     llvm::Type *Tys[2] = { Ty, VTy };
6615     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6616     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6617     return Builder.CreateTrunc(Ops[0], Int8Ty);
6618   }
6619   case NEON::BI__builtin_neon_vminv_s16: {
6620     Int = Intrinsic::aarch64_neon_sminv;
6621     Ty = Int32Ty;
6622     VTy = llvm::VectorType::get(Int16Ty, 4);
6623     llvm::Type *Tys[2] = { Ty, VTy };
6624     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6625     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6626     return Builder.CreateTrunc(Ops[0], Int16Ty);
6627   }
6628   case NEON::BI__builtin_neon_vminvq_s8: {
6629     Int = Intrinsic::aarch64_neon_sminv;
6630     Ty = Int32Ty;
6631     VTy = llvm::VectorType::get(Int8Ty, 16);
6632     llvm::Type *Tys[2] = { Ty, VTy };
6633     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6634     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6635     return Builder.CreateTrunc(Ops[0], Int8Ty);
6636   }
6637   case NEON::BI__builtin_neon_vminvq_s16: {
6638     Int = Intrinsic::aarch64_neon_sminv;
6639     Ty = Int32Ty;
6640     VTy = llvm::VectorType::get(Int16Ty, 8);
6641     llvm::Type *Tys[2] = { Ty, VTy };
6642     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6643     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6644     return Builder.CreateTrunc(Ops[0], Int16Ty);
6645   }
6646   case NEON::BI__builtin_neon_vmul_n_f64: {
6647     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6648     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
6649     return Builder.CreateFMul(Ops[0], RHS);
6650   }
6651   case NEON::BI__builtin_neon_vaddlv_u8: {
6652     Int = Intrinsic::aarch64_neon_uaddlv;
6653     Ty = Int32Ty;
6654     VTy = llvm::VectorType::get(Int8Ty, 8);
6655     llvm::Type *Tys[2] = { Ty, VTy };
6656     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6657     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6658     return Builder.CreateTrunc(Ops[0], Int16Ty);
6659   }
6660   case NEON::BI__builtin_neon_vaddlv_u16: {
6661     Int = Intrinsic::aarch64_neon_uaddlv;
6662     Ty = Int32Ty;
6663     VTy = llvm::VectorType::get(Int16Ty, 4);
6664     llvm::Type *Tys[2] = { Ty, VTy };
6665     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6666     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6667   }
6668   case NEON::BI__builtin_neon_vaddlvq_u8: {
6669     Int = Intrinsic::aarch64_neon_uaddlv;
6670     Ty = Int32Ty;
6671     VTy = llvm::VectorType::get(Int8Ty, 16);
6672     llvm::Type *Tys[2] = { Ty, VTy };
6673     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6674     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6675     return Builder.CreateTrunc(Ops[0], Int16Ty);
6676   }
6677   case NEON::BI__builtin_neon_vaddlvq_u16: {
6678     Int = Intrinsic::aarch64_neon_uaddlv;
6679     Ty = Int32Ty;
6680     VTy = llvm::VectorType::get(Int16Ty, 8);
6681     llvm::Type *Tys[2] = { Ty, VTy };
6682     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6683     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6684   }
6685   case NEON::BI__builtin_neon_vaddlv_s8: {
6686     Int = Intrinsic::aarch64_neon_saddlv;
6687     Ty = Int32Ty;
6688     VTy = llvm::VectorType::get(Int8Ty, 8);
6689     llvm::Type *Tys[2] = { Ty, VTy };
6690     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6691     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6692     return Builder.CreateTrunc(Ops[0], Int16Ty);
6693   }
6694   case NEON::BI__builtin_neon_vaddlv_s16: {
6695     Int = Intrinsic::aarch64_neon_saddlv;
6696     Ty = Int32Ty;
6697     VTy = llvm::VectorType::get(Int16Ty, 4);
6698     llvm::Type *Tys[2] = { Ty, VTy };
6699     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6700     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6701   }
6702   case NEON::BI__builtin_neon_vaddlvq_s8: {
6703     Int = Intrinsic::aarch64_neon_saddlv;
6704     Ty = Int32Ty;
6705     VTy = llvm::VectorType::get(Int8Ty, 16);
6706     llvm::Type *Tys[2] = { Ty, VTy };
6707     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6708     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6709     return Builder.CreateTrunc(Ops[0], Int16Ty);
6710   }
6711   case NEON::BI__builtin_neon_vaddlvq_s16: {
6712     Int = Intrinsic::aarch64_neon_saddlv;
6713     Ty = Int32Ty;
6714     VTy = llvm::VectorType::get(Int16Ty, 8);
6715     llvm::Type *Tys[2] = { Ty, VTy };
6716     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6717     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6718   }
6719   case NEON::BI__builtin_neon_vsri_n_v:
6720   case NEON::BI__builtin_neon_vsriq_n_v: {
6721     Int = Intrinsic::aarch64_neon_vsri;
6722     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6723     return EmitNeonCall(Intrin, Ops, "vsri_n");
6724   }
6725   case NEON::BI__builtin_neon_vsli_n_v:
6726   case NEON::BI__builtin_neon_vsliq_n_v: {
6727     Int = Intrinsic::aarch64_neon_vsli;
6728     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6729     return EmitNeonCall(Intrin, Ops, "vsli_n");
6730   }
6731   case NEON::BI__builtin_neon_vsra_n_v:
6732   case NEON::BI__builtin_neon_vsraq_n_v:
6733     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6734     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6735     return Builder.CreateAdd(Ops[0], Ops[1]);
6736   case NEON::BI__builtin_neon_vrsra_n_v:
6737   case NEON::BI__builtin_neon_vrsraq_n_v: {
6738     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6739     SmallVector<llvm::Value*,2> TmpOps;
6740     TmpOps.push_back(Ops[1]);
6741     TmpOps.push_back(Ops[2]);
6742     Function* F = CGM.getIntrinsic(Int, Ty);
6743     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6744     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6745     return Builder.CreateAdd(Ops[0], tmp);
6746   }
6747     // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
6748     // of an Align parameter here.
6749   case NEON::BI__builtin_neon_vld1_x2_v:
6750   case NEON::BI__builtin_neon_vld1q_x2_v:
6751   case NEON::BI__builtin_neon_vld1_x3_v:
6752   case NEON::BI__builtin_neon_vld1q_x3_v:
6753   case NEON::BI__builtin_neon_vld1_x4_v:
6754   case NEON::BI__builtin_neon_vld1q_x4_v: {
6755     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6756     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6757     llvm::Type *Tys[2] = { VTy, PTy };
6758     unsigned Int;
6759     switch (BuiltinID) {
6760     case NEON::BI__builtin_neon_vld1_x2_v:
6761     case NEON::BI__builtin_neon_vld1q_x2_v:
6762       Int = Intrinsic::aarch64_neon_ld1x2;
6763       break;
6764     case NEON::BI__builtin_neon_vld1_x3_v:
6765     case NEON::BI__builtin_neon_vld1q_x3_v:
6766       Int = Intrinsic::aarch64_neon_ld1x3;
6767       break;
6768     case NEON::BI__builtin_neon_vld1_x4_v:
6769     case NEON::BI__builtin_neon_vld1q_x4_v:
6770       Int = Intrinsic::aarch64_neon_ld1x4;
6771       break;
6772     }
6773     Function *F = CGM.getIntrinsic(Int, Tys);
6774     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
6775     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6776     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6777     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6778   }
6779   case NEON::BI__builtin_neon_vst1_x2_v:
6780   case NEON::BI__builtin_neon_vst1q_x2_v:
6781   case NEON::BI__builtin_neon_vst1_x3_v:
6782   case NEON::BI__builtin_neon_vst1q_x3_v:
6783   case NEON::BI__builtin_neon_vst1_x4_v:
6784   case NEON::BI__builtin_neon_vst1q_x4_v: {
6785     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6786     llvm::Type *Tys[2] = { VTy, PTy };
6787     unsigned Int;
6788     switch (BuiltinID) {
6789     case NEON::BI__builtin_neon_vst1_x2_v:
6790     case NEON::BI__builtin_neon_vst1q_x2_v:
6791       Int = Intrinsic::aarch64_neon_st1x2;
6792       break;
6793     case NEON::BI__builtin_neon_vst1_x3_v:
6794     case NEON::BI__builtin_neon_vst1q_x3_v:
6795       Int = Intrinsic::aarch64_neon_st1x3;
6796       break;
6797     case NEON::BI__builtin_neon_vst1_x4_v:
6798     case NEON::BI__builtin_neon_vst1q_x4_v:
6799       Int = Intrinsic::aarch64_neon_st1x4;
6800       break;
6801     }
6802     std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6803     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
6804   }
6805   case NEON::BI__builtin_neon_vld1_v:
6806   case NEON::BI__builtin_neon_vld1q_v: {
6807     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6808     auto Alignment = CharUnits::fromQuantity(
6809         BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16);
6810     return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
6811   }
6812   case NEON::BI__builtin_neon_vst1_v:
6813   case NEON::BI__builtin_neon_vst1q_v:
6814     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6815     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6816     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6817   case NEON::BI__builtin_neon_vld1_lane_v:
6818   case NEON::BI__builtin_neon_vld1q_lane_v: {
6819     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6820     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6821     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6822     auto Alignment = CharUnits::fromQuantity(
6823         BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16);
6824     Ops[0] =
6825         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6826     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6827   }
6828   case NEON::BI__builtin_neon_vld1_dup_v:
6829   case NEON::BI__builtin_neon_vld1q_dup_v: {
6830     Value *V = UndefValue::get(Ty);
6831     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6832     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6833     auto Alignment = CharUnits::fromQuantity(
6834         BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16);
6835     Ops[0] =
6836         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6837     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6838     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6839     return EmitNeonSplat(Ops[0], CI);
6840   }
6841   case NEON::BI__builtin_neon_vst1_lane_v:
6842   case NEON::BI__builtin_neon_vst1q_lane_v:
6843     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6844     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6845     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6846     return Builder.CreateDefaultAlignedStore(Ops[1],
6847                                              Builder.CreateBitCast(Ops[0], Ty));
6848   case NEON::BI__builtin_neon_vld2_v:
6849   case NEON::BI__builtin_neon_vld2q_v: {
6850     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6851     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6852     llvm::Type *Tys[2] = { VTy, PTy };
6853     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6854     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6855     Ops[0] = Builder.CreateBitCast(Ops[0],
6856                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6857     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6858   }
6859   case NEON::BI__builtin_neon_vld3_v:
6860   case NEON::BI__builtin_neon_vld3q_v: {
6861     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6862     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6863     llvm::Type *Tys[2] = { VTy, PTy };
6864     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6865     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6866     Ops[0] = Builder.CreateBitCast(Ops[0],
6867                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6868     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6869   }
6870   case NEON::BI__builtin_neon_vld4_v:
6871   case NEON::BI__builtin_neon_vld4q_v: {
6872     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6873     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6874     llvm::Type *Tys[2] = { VTy, PTy };
6875     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6876     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6877     Ops[0] = Builder.CreateBitCast(Ops[0],
6878                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6879     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6880   }
6881   case NEON::BI__builtin_neon_vld2_dup_v:
6882   case NEON::BI__builtin_neon_vld2q_dup_v: {
6883     llvm::Type *PTy =
6884       llvm::PointerType::getUnqual(VTy->getElementType());
6885     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6886     llvm::Type *Tys[2] = { VTy, PTy };
6887     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6888     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6889     Ops[0] = Builder.CreateBitCast(Ops[0],
6890                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6891     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6892   }
6893   case NEON::BI__builtin_neon_vld3_dup_v:
6894   case NEON::BI__builtin_neon_vld3q_dup_v: {
6895     llvm::Type *PTy =
6896       llvm::PointerType::getUnqual(VTy->getElementType());
6897     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6898     llvm::Type *Tys[2] = { VTy, PTy };
6899     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6900     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6901     Ops[0] = Builder.CreateBitCast(Ops[0],
6902                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6903     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6904   }
6905   case NEON::BI__builtin_neon_vld4_dup_v:
6906   case NEON::BI__builtin_neon_vld4q_dup_v: {
6907     llvm::Type *PTy =
6908       llvm::PointerType::getUnqual(VTy->getElementType());
6909     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6910     llvm::Type *Tys[2] = { VTy, PTy };
6911     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6912     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6913     Ops[0] = Builder.CreateBitCast(Ops[0],
6914                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6915     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6916   }
6917   case NEON::BI__builtin_neon_vld2_lane_v:
6918   case NEON::BI__builtin_neon_vld2q_lane_v: {
6919     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6920     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6921     Ops.push_back(Ops[1]);
6922     Ops.erase(Ops.begin()+1);
6923     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6924     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6925     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6926     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
6927     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6928     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6929     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6930   }
6931   case NEON::BI__builtin_neon_vld3_lane_v:
6932   case NEON::BI__builtin_neon_vld3q_lane_v: {
6933     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6934     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6935     Ops.push_back(Ops[1]);
6936     Ops.erase(Ops.begin()+1);
6937     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6938     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6939     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6940     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6941     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
6942     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6943     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6944     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6945   }
6946   case NEON::BI__builtin_neon_vld4_lane_v:
6947   case NEON::BI__builtin_neon_vld4q_lane_v: {
6948     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6949     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6950     Ops.push_back(Ops[1]);
6951     Ops.erase(Ops.begin()+1);
6952     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6953     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6954     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6955     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6956     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6957     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
6958     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6959     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6960     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6961   }
6962   case NEON::BI__builtin_neon_vst2_v:
6963   case NEON::BI__builtin_neon_vst2q_v: {
6964     Ops.push_back(Ops[0]);
6965     Ops.erase(Ops.begin());
6966     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6967     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6968                         Ops, "");
6969   }
6970   case NEON::BI__builtin_neon_vst2_lane_v:
6971   case NEON::BI__builtin_neon_vst2q_lane_v: {
6972     Ops.push_back(Ops[0]);
6973     Ops.erase(Ops.begin());
6974     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6975     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6976     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6977                         Ops, "");
6978   }
6979   case NEON::BI__builtin_neon_vst3_v:
6980   case NEON::BI__builtin_neon_vst3q_v: {
6981     Ops.push_back(Ops[0]);
6982     Ops.erase(Ops.begin());
6983     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6984     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6985                         Ops, "");
6986   }
6987   case NEON::BI__builtin_neon_vst3_lane_v:
6988   case NEON::BI__builtin_neon_vst3q_lane_v: {
6989     Ops.push_back(Ops[0]);
6990     Ops.erase(Ops.begin());
6991     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6992     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6993     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6994                         Ops, "");
6995   }
6996   case NEON::BI__builtin_neon_vst4_v:
6997   case NEON::BI__builtin_neon_vst4q_v: {
6998     Ops.push_back(Ops[0]);
6999     Ops.erase(Ops.begin());
7000     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7001     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
7002                         Ops, "");
7003   }
7004   case NEON::BI__builtin_neon_vst4_lane_v:
7005   case NEON::BI__builtin_neon_vst4q_lane_v: {
7006     Ops.push_back(Ops[0]);
7007     Ops.erase(Ops.begin());
7008     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7009     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7010     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
7011                         Ops, "");
7012   }
7013   case NEON::BI__builtin_neon_vtrn_v:
7014   case NEON::BI__builtin_neon_vtrnq_v: {
7015     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7016     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7017     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7018     Value *SV = nullptr;
7019 
7020     for (unsigned vi = 0; vi != 2; ++vi) {
7021       SmallVector<uint32_t, 16> Indices;
7022       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7023         Indices.push_back(i+vi);
7024         Indices.push_back(i+e+vi);
7025       }
7026       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7027       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
7028       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7029     }
7030     return SV;
7031   }
7032   case NEON::BI__builtin_neon_vuzp_v:
7033   case NEON::BI__builtin_neon_vuzpq_v: {
7034     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7035     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7036     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7037     Value *SV = nullptr;
7038 
7039     for (unsigned vi = 0; vi != 2; ++vi) {
7040       SmallVector<uint32_t, 16> Indices;
7041       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7042         Indices.push_back(2*i+vi);
7043 
7044       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7045       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
7046       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7047     }
7048     return SV;
7049   }
7050   case NEON::BI__builtin_neon_vzip_v:
7051   case NEON::BI__builtin_neon_vzipq_v: {
7052     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7053     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7054     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7055     Value *SV = nullptr;
7056 
7057     for (unsigned vi = 0; vi != 2; ++vi) {
7058       SmallVector<uint32_t, 16> Indices;
7059       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7060         Indices.push_back((i + vi*e) >> 1);
7061         Indices.push_back(((i + vi*e) >> 1)+e);
7062       }
7063       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7064       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
7065       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7066     }
7067     return SV;
7068   }
7069   case NEON::BI__builtin_neon_vqtbl1q_v: {
7070     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7071                         Ops, "vtbl1");
7072   }
7073   case NEON::BI__builtin_neon_vqtbl2q_v: {
7074     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7075                         Ops, "vtbl2");
7076   }
7077   case NEON::BI__builtin_neon_vqtbl3q_v: {
7078     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7079                         Ops, "vtbl3");
7080   }
7081   case NEON::BI__builtin_neon_vqtbl4q_v: {
7082     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7083                         Ops, "vtbl4");
7084   }
7085   case NEON::BI__builtin_neon_vqtbx1q_v: {
7086     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7087                         Ops, "vtbx1");
7088   }
7089   case NEON::BI__builtin_neon_vqtbx2q_v: {
7090     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7091                         Ops, "vtbx2");
7092   }
7093   case NEON::BI__builtin_neon_vqtbx3q_v: {
7094     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7095                         Ops, "vtbx3");
7096   }
7097   case NEON::BI__builtin_neon_vqtbx4q_v: {
7098     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7099                         Ops, "vtbx4");
7100   }
7101   case NEON::BI__builtin_neon_vsqadd_v:
7102   case NEON::BI__builtin_neon_vsqaddq_v: {
7103     Int = Intrinsic::aarch64_neon_usqadd;
7104     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
7105   }
7106   case NEON::BI__builtin_neon_vuqadd_v:
7107   case NEON::BI__builtin_neon_vuqaddq_v: {
7108     Int = Intrinsic::aarch64_neon_suqadd;
7109     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
7110   }
7111   }
7112 }
7113 
7114 llvm::Value *CodeGenFunction::
7115 BuildVector(ArrayRef<llvm::Value*> Ops) {
7116   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7117          "Not a power-of-two sized vector!");
7118   bool AllConstants = true;
7119   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7120     AllConstants &= isa<Constant>(Ops[i]);
7121 
7122   // If this is a constant vector, create a ConstantVector.
7123   if (AllConstants) {
7124     SmallVector<llvm::Constant*, 16> CstOps;
7125     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7126       CstOps.push_back(cast<Constant>(Ops[i]));
7127     return llvm::ConstantVector::get(CstOps);
7128   }
7129 
7130   // Otherwise, insertelement the values to build the vector.
7131   Value *Result =
7132     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
7133 
7134   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7135     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
7136 
7137   return Result;
7138 }
7139 
7140 // Convert the mask from an integer type to a vector of i1.
7141 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
7142                               unsigned NumElts) {
7143 
7144   llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
7145                          cast<IntegerType>(Mask->getType())->getBitWidth());
7146   Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
7147 
7148   // If we have less than 8 elements, then the starting mask was an i8 and
7149   // we need to extract down to the right number of elements.
7150   if (NumElts < 8) {
7151     uint32_t Indices[4];
7152     for (unsigned i = 0; i != NumElts; ++i)
7153       Indices[i] = i;
7154     MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
7155                                              makeArrayRef(Indices, NumElts),
7156                                              "extract");
7157   }
7158   return MaskVec;
7159 }
7160 
7161 static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
7162                                  SmallVectorImpl<Value *> &Ops,
7163                                  unsigned Align) {
7164   // Cast the pointer to right type.
7165   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7166                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7167 
7168   // If the mask is all ones just emit a regular store.
7169   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7170     if (C->isAllOnesValue())
7171       return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
7172 
7173   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7174                                    Ops[1]->getType()->getVectorNumElements());
7175 
7176   return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
7177 }
7178 
7179 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
7180                                 SmallVectorImpl<Value *> &Ops, unsigned Align) {
7181   // Cast the pointer to right type.
7182   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7183                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7184 
7185   // If the mask is all ones just emit a regular store.
7186   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7187     if (C->isAllOnesValue())
7188       return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7189 
7190   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7191                                    Ops[1]->getType()->getVectorNumElements());
7192 
7193   return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
7194 }
7195 
7196 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
7197                                         SmallVectorImpl<Value *> &Ops,
7198                                         llvm::Type *DstTy,
7199                                         unsigned SrcSizeInBits,
7200                                         unsigned Align) {
7201   // Load the subvector.
7202   Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7203 
7204   // Create broadcast mask.
7205   unsigned NumDstElts = DstTy->getVectorNumElements();
7206   unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
7207 
7208   SmallVector<uint32_t, 8> Mask;
7209   for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
7210     for (unsigned j = 0; j != NumSrcElts; ++j)
7211       Mask.push_back(j);
7212 
7213   return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
7214 }
7215 
7216 static Value *EmitX86Select(CodeGenFunction &CGF,
7217                             Value *Mask, Value *Op0, Value *Op1) {
7218 
7219   // If the mask is all ones just return first argument.
7220   if (const auto *C = dyn_cast<Constant>(Mask))
7221     if (C->isAllOnesValue())
7222       return Op0;
7223 
7224   Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
7225 
7226   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
7227 }
7228 
7229 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
7230                                    bool Signed, SmallVectorImpl<Value *> &Ops) {
7231   unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7232   Value *Cmp;
7233 
7234   if (CC == 3) {
7235     Cmp = Constant::getNullValue(
7236                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7237   } else if (CC == 7) {
7238     Cmp = Constant::getAllOnesValue(
7239                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7240   } else {
7241     ICmpInst::Predicate Pred;
7242     switch (CC) {
7243     default: llvm_unreachable("Unknown condition code");
7244     case 0: Pred = ICmpInst::ICMP_EQ;  break;
7245     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
7246     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
7247     case 4: Pred = ICmpInst::ICMP_NE;  break;
7248     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
7249     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
7250     }
7251     Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7252   }
7253 
7254   const auto *C = dyn_cast<Constant>(Ops.back());
7255   if (!C || !C->isAllOnesValue())
7256     Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
7257 
7258   if (NumElts < 8) {
7259     uint32_t Indices[8];
7260     for (unsigned i = 0; i != NumElts; ++i)
7261       Indices[i] = i;
7262     for (unsigned i = NumElts; i != 8; ++i)
7263       Indices[i] = i % NumElts + NumElts;
7264     Cmp = CGF.Builder.CreateShuffleVector(
7265         Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
7266   }
7267   return CGF.Builder.CreateBitCast(Cmp,
7268                                    IntegerType::get(CGF.getLLVMContext(),
7269                                                     std::max(NumElts, 8U)));
7270 }
7271 
7272 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
7273                             ArrayRef<Value *> Ops) {
7274   Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7275   Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7276 
7277   if (Ops.size() == 2)
7278     return Res;
7279 
7280   assert(Ops.size() == 4);
7281   return EmitX86Select(CGF, Ops[3], Res, Ops[2]);
7282 }
7283 
7284 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
7285                               llvm::Type *DstTy) {
7286   unsigned NumberOfElements = DstTy->getVectorNumElements();
7287   Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
7288   return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
7289 }
7290 
7291 static Value *EmitX86CpuIs(CodeGenFunction &CGF, const CallExpr *E) {
7292   const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
7293   StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
7294 
7295   // This enum contains the vendor, type, and subtype enums from the
7296   // runtime library concatenated together. The _START labels mark
7297   // the start and are used to adjust the value into the correct
7298   // encoding space.
7299   enum X86CPUs {
7300     INTEL = 1,
7301     AMD,
7302     CPU_TYPE_START,
7303     INTEL_BONNELL,
7304     INTEL_CORE2,
7305     INTEL_COREI7,
7306     AMDFAM10H,
7307     AMDFAM15H,
7308     INTEL_SILVERMONT,
7309     INTEL_KNL,
7310     AMD_BTVER1,
7311     AMD_BTVER2,
7312     CPU_SUBTYPE_START,
7313     INTEL_COREI7_NEHALEM,
7314     INTEL_COREI7_WESTMERE,
7315     INTEL_COREI7_SANDYBRIDGE,
7316     AMDFAM10H_BARCELONA,
7317     AMDFAM10H_SHANGHAI,
7318     AMDFAM10H_ISTANBUL,
7319     AMDFAM15H_BDVER1,
7320     AMDFAM15H_BDVER2,
7321     AMDFAM15H_BDVER3,
7322     AMDFAM15H_BDVER4,
7323     AMDFAM17H_ZNVER1,
7324     INTEL_COREI7_IVYBRIDGE,
7325     INTEL_COREI7_HASWELL,
7326     INTEL_COREI7_BROADWELL,
7327     INTEL_COREI7_SKYLAKE,
7328     INTEL_COREI7_SKYLAKE_AVX512,
7329   };
7330 
7331   X86CPUs CPU =
7332     StringSwitch<X86CPUs>(CPUStr)
7333       .Case("amd", AMD)
7334       .Case("amdfam10h", AMDFAM10H)
7335       .Case("amdfam15h", AMDFAM15H)
7336       .Case("atom", INTEL_BONNELL)
7337       .Case("barcelona", AMDFAM10H_BARCELONA)
7338       .Case("bdver1", AMDFAM15H_BDVER1)
7339       .Case("bdver2", AMDFAM15H_BDVER2)
7340       .Case("bdver3", AMDFAM15H_BDVER3)
7341       .Case("bdver4", AMDFAM15H_BDVER4)
7342       .Case("bonnell", INTEL_BONNELL)
7343       .Case("broadwell", INTEL_COREI7_BROADWELL)
7344       .Case("btver1", AMD_BTVER1)
7345       .Case("btver2", AMD_BTVER2)
7346       .Case("core2", INTEL_CORE2)
7347       .Case("corei7", INTEL_COREI7)
7348       .Case("haswell", INTEL_COREI7_HASWELL)
7349       .Case("intel", INTEL)
7350       .Case("istanbul", AMDFAM10H_ISTANBUL)
7351       .Case("ivybridge", INTEL_COREI7_IVYBRIDGE)
7352       .Case("knl", INTEL_KNL)
7353       .Case("nehalem", INTEL_COREI7_NEHALEM)
7354       .Case("sandybridge", INTEL_COREI7_SANDYBRIDGE)
7355       .Case("shanghai", AMDFAM10H_SHANGHAI)
7356       .Case("silvermont", INTEL_SILVERMONT)
7357       .Case("skylake", INTEL_COREI7_SKYLAKE)
7358       .Case("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512)
7359       .Case("slm", INTEL_SILVERMONT)
7360       .Case("westmere", INTEL_COREI7_WESTMERE)
7361       .Case("znver1", AMDFAM17H_ZNVER1);
7362 
7363   llvm::Type *Int32Ty = CGF.Builder.getInt32Ty();
7364 
7365   // Matching the struct layout from the compiler-rt/libgcc structure that is
7366   // filled in:
7367   // unsigned int __cpu_vendor;
7368   // unsigned int __cpu_type;
7369   // unsigned int __cpu_subtype;
7370   // unsigned int __cpu_features[1];
7371   llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
7372                                           llvm::ArrayType::get(Int32Ty, 1));
7373 
7374   // Grab the global __cpu_model.
7375   llvm::Constant *CpuModel = CGF.CGM.CreateRuntimeVariable(STy, "__cpu_model");
7376 
7377   // Calculate the index needed to access the correct field based on the
7378   // range. Also adjust the expected value.
7379   unsigned Index;
7380   unsigned Value;
7381   if (CPU > CPU_SUBTYPE_START) {
7382     Index = 2;
7383     Value = CPU - CPU_SUBTYPE_START;
7384   } else if (CPU > CPU_TYPE_START) {
7385     Index = 1;
7386     Value = CPU - CPU_TYPE_START;
7387   } else {
7388     Index = 0;
7389     Value = CPU;
7390   }
7391 
7392   // Grab the appropriate field from __cpu_model.
7393   llvm::Value *Idxs[] = {
7394     ConstantInt::get(Int32Ty, 0),
7395     ConstantInt::get(Int32Ty, Index)
7396   };
7397   llvm::Value *CpuValue = CGF.Builder.CreateGEP(STy, CpuModel, Idxs);
7398   CpuValue = CGF.Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4));
7399 
7400   // Check the value of the field against the requested value.
7401   return CGF.Builder.CreateICmpEQ(CpuValue,
7402                                   llvm::ConstantInt::get(Int32Ty, Value));
7403 }
7404 
7405 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
7406                                            const CallExpr *E) {
7407   if (BuiltinID == X86::BI__builtin_cpu_is)
7408     return EmitX86CpuIs(*this, E);
7409 
7410   SmallVector<Value*, 4> Ops;
7411 
7412   // Find out if any arguments are required to be integer constant expressions.
7413   unsigned ICEArguments = 0;
7414   ASTContext::GetBuiltinTypeError Error;
7415   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
7416   assert(Error == ASTContext::GE_None && "Should not codegen an error");
7417 
7418   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
7419     // If this is a normal argument, just emit it as a scalar.
7420     if ((ICEArguments & (1 << i)) == 0) {
7421       Ops.push_back(EmitScalarExpr(E->getArg(i)));
7422       continue;
7423     }
7424 
7425     // If this is required to be a constant, constant fold it so that we know
7426     // that the generated intrinsic gets a ConstantInt.
7427     llvm::APSInt Result;
7428     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
7429     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
7430     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
7431   }
7432 
7433   // These exist so that the builtin that takes an immediate can be bounds
7434   // checked by clang to avoid passing bad immediates to the backend. Since
7435   // AVX has a larger immediate than SSE we would need separate builtins to
7436   // do the different bounds checking. Rather than create a clang specific
7437   // SSE only builtin, this implements eight separate builtins to match gcc
7438   // implementation.
7439   auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
7440     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
7441     llvm::Function *F = CGM.getIntrinsic(ID);
7442     return Builder.CreateCall(F, Ops);
7443   };
7444 
7445   // For the vector forms of FP comparisons, translate the builtins directly to
7446   // IR.
7447   // TODO: The builtins could be removed if the SSE header files used vector
7448   // extension comparisons directly (vector ordered/unordered may need
7449   // additional support via __builtin_isnan()).
7450   auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
7451     Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
7452     llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
7453     llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
7454     Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
7455     return Builder.CreateBitCast(Sext, FPVecTy);
7456   };
7457 
7458   switch (BuiltinID) {
7459   default: return nullptr;
7460   case X86::BI__builtin_cpu_supports: {
7461     const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
7462     StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
7463 
7464     // TODO: When/if this becomes more than x86 specific then use a TargetInfo
7465     // based mapping.
7466     // Processor features and mapping to processor feature value.
7467     enum X86Features {
7468       CMOV = 0,
7469       MMX,
7470       POPCNT,
7471       SSE,
7472       SSE2,
7473       SSE3,
7474       SSSE3,
7475       SSE4_1,
7476       SSE4_2,
7477       AVX,
7478       AVX2,
7479       SSE4_A,
7480       FMA4,
7481       XOP,
7482       FMA,
7483       AVX512F,
7484       BMI,
7485       BMI2,
7486       AES,
7487       PCLMUL,
7488       AVX512VL,
7489       AVX512BW,
7490       AVX512DQ,
7491       AVX512CD,
7492       AVX512ER,
7493       AVX512PF,
7494       AVX512VBMI,
7495       AVX512IFMA,
7496       AVX5124VNNIW,
7497       AVX5124FMAPS,
7498       AVX512VPOPCNTDQ,
7499       MAX
7500     };
7501 
7502     X86Features Feature =
7503         StringSwitch<X86Features>(FeatureStr)
7504             .Case("cmov", X86Features::CMOV)
7505             .Case("mmx", X86Features::MMX)
7506             .Case("popcnt", X86Features::POPCNT)
7507             .Case("sse", X86Features::SSE)
7508             .Case("sse2", X86Features::SSE2)
7509             .Case("sse3", X86Features::SSE3)
7510             .Case("ssse3", X86Features::SSSE3)
7511             .Case("sse4.1", X86Features::SSE4_1)
7512             .Case("sse4.2", X86Features::SSE4_2)
7513             .Case("avx", X86Features::AVX)
7514             .Case("avx2", X86Features::AVX2)
7515             .Case("sse4a", X86Features::SSE4_A)
7516             .Case("fma4", X86Features::FMA4)
7517             .Case("xop", X86Features::XOP)
7518             .Case("fma", X86Features::FMA)
7519             .Case("avx512f", X86Features::AVX512F)
7520             .Case("bmi", X86Features::BMI)
7521             .Case("bmi2", X86Features::BMI2)
7522             .Case("aes", X86Features::AES)
7523             .Case("pclmul", X86Features::PCLMUL)
7524             .Case("avx512vl", X86Features::AVX512VL)
7525             .Case("avx512bw", X86Features::AVX512BW)
7526             .Case("avx512dq", X86Features::AVX512DQ)
7527             .Case("avx512cd", X86Features::AVX512CD)
7528             .Case("avx512er", X86Features::AVX512ER)
7529             .Case("avx512pf", X86Features::AVX512PF)
7530             .Case("avx512vbmi", X86Features::AVX512VBMI)
7531             .Case("avx512ifma", X86Features::AVX512IFMA)
7532             .Case("avx5124vnniw", X86Features::AVX5124VNNIW)
7533             .Case("avx5124fmaps", X86Features::AVX5124FMAPS)
7534             .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ)
7535             .Default(X86Features::MAX);
7536     assert(Feature != X86Features::MAX && "Invalid feature!");
7537 
7538     // Matching the struct layout from the compiler-rt/libgcc structure that is
7539     // filled in:
7540     // unsigned int __cpu_vendor;
7541     // unsigned int __cpu_type;
7542     // unsigned int __cpu_subtype;
7543     // unsigned int __cpu_features[1];
7544     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
7545                                             llvm::ArrayType::get(Int32Ty, 1));
7546 
7547     // Grab the global __cpu_model.
7548     llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
7549 
7550     // Grab the first (0th) element from the field __cpu_features off of the
7551     // global in the struct STy.
7552     Value *Idxs[] = {
7553       ConstantInt::get(Int32Ty, 0),
7554       ConstantInt::get(Int32Ty, 3),
7555       ConstantInt::get(Int32Ty, 0)
7556     };
7557     Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
7558     Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
7559                                                 CharUnits::fromQuantity(4));
7560 
7561     // Check the value of the bit corresponding to the feature requested.
7562     Value *Bitset = Builder.CreateAnd(
7563         Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
7564     return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
7565   }
7566   case X86::BI_mm_prefetch: {
7567     Value *Address = Ops[0];
7568     Value *RW = ConstantInt::get(Int32Ty, 0);
7569     Value *Locality = Ops[1];
7570     Value *Data = ConstantInt::get(Int32Ty, 1);
7571     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
7572     return Builder.CreateCall(F, {Address, RW, Locality, Data});
7573   }
7574   case X86::BI_mm_clflush: {
7575     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
7576                               Ops[0]);
7577   }
7578   case X86::BI_mm_lfence: {
7579     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
7580   }
7581   case X86::BI_mm_mfence: {
7582     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
7583   }
7584   case X86::BI_mm_sfence: {
7585     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
7586   }
7587   case X86::BI_mm_pause: {
7588     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
7589   }
7590   case X86::BI__rdtsc: {
7591     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
7592   }
7593   case X86::BI__builtin_ia32_undef128:
7594   case X86::BI__builtin_ia32_undef256:
7595   case X86::BI__builtin_ia32_undef512:
7596     // The x86 definition of "undef" is not the same as the LLVM definition
7597     // (PR32176). We leave optimizing away an unnecessary zero constant to the
7598     // IR optimizer and backend.
7599     // TODO: If we had a "freeze" IR instruction to generate a fixed undef
7600     // value, we should use that here instead of a zero.
7601     return llvm::Constant::getNullValue(ConvertType(E->getType()));
7602   case X86::BI__builtin_ia32_vec_init_v8qi:
7603   case X86::BI__builtin_ia32_vec_init_v4hi:
7604   case X86::BI__builtin_ia32_vec_init_v2si:
7605     return Builder.CreateBitCast(BuildVector(Ops),
7606                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
7607   case X86::BI__builtin_ia32_vec_ext_v2si:
7608     return Builder.CreateExtractElement(Ops[0],
7609                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
7610   case X86::BI_mm_setcsr:
7611   case X86::BI__builtin_ia32_ldmxcsr: {
7612     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
7613     Builder.CreateStore(Ops[0], Tmp);
7614     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
7615                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7616   }
7617   case X86::BI_mm_getcsr:
7618   case X86::BI__builtin_ia32_stmxcsr: {
7619     Address Tmp = CreateMemTemp(E->getType());
7620     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
7621                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7622     return Builder.CreateLoad(Tmp, "stmxcsr");
7623   }
7624   case X86::BI__builtin_ia32_xsave:
7625   case X86::BI__builtin_ia32_xsave64:
7626   case X86::BI__builtin_ia32_xrstor:
7627   case X86::BI__builtin_ia32_xrstor64:
7628   case X86::BI__builtin_ia32_xsaveopt:
7629   case X86::BI__builtin_ia32_xsaveopt64:
7630   case X86::BI__builtin_ia32_xrstors:
7631   case X86::BI__builtin_ia32_xrstors64:
7632   case X86::BI__builtin_ia32_xsavec:
7633   case X86::BI__builtin_ia32_xsavec64:
7634   case X86::BI__builtin_ia32_xsaves:
7635   case X86::BI__builtin_ia32_xsaves64: {
7636     Intrinsic::ID ID;
7637 #define INTRINSIC_X86_XSAVE_ID(NAME) \
7638     case X86::BI__builtin_ia32_##NAME: \
7639       ID = Intrinsic::x86_##NAME; \
7640       break
7641     switch (BuiltinID) {
7642     default: llvm_unreachable("Unsupported intrinsic!");
7643     INTRINSIC_X86_XSAVE_ID(xsave);
7644     INTRINSIC_X86_XSAVE_ID(xsave64);
7645     INTRINSIC_X86_XSAVE_ID(xrstor);
7646     INTRINSIC_X86_XSAVE_ID(xrstor64);
7647     INTRINSIC_X86_XSAVE_ID(xsaveopt);
7648     INTRINSIC_X86_XSAVE_ID(xsaveopt64);
7649     INTRINSIC_X86_XSAVE_ID(xrstors);
7650     INTRINSIC_X86_XSAVE_ID(xrstors64);
7651     INTRINSIC_X86_XSAVE_ID(xsavec);
7652     INTRINSIC_X86_XSAVE_ID(xsavec64);
7653     INTRINSIC_X86_XSAVE_ID(xsaves);
7654     INTRINSIC_X86_XSAVE_ID(xsaves64);
7655     }
7656 #undef INTRINSIC_X86_XSAVE_ID
7657     Value *Mhi = Builder.CreateTrunc(
7658       Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
7659     Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
7660     Ops[1] = Mhi;
7661     Ops.push_back(Mlo);
7662     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7663   }
7664   case X86::BI__builtin_ia32_storedqudi128_mask:
7665   case X86::BI__builtin_ia32_storedqusi128_mask:
7666   case X86::BI__builtin_ia32_storedquhi128_mask:
7667   case X86::BI__builtin_ia32_storedquqi128_mask:
7668   case X86::BI__builtin_ia32_storeupd128_mask:
7669   case X86::BI__builtin_ia32_storeups128_mask:
7670   case X86::BI__builtin_ia32_storedqudi256_mask:
7671   case X86::BI__builtin_ia32_storedqusi256_mask:
7672   case X86::BI__builtin_ia32_storedquhi256_mask:
7673   case X86::BI__builtin_ia32_storedquqi256_mask:
7674   case X86::BI__builtin_ia32_storeupd256_mask:
7675   case X86::BI__builtin_ia32_storeups256_mask:
7676   case X86::BI__builtin_ia32_storedqudi512_mask:
7677   case X86::BI__builtin_ia32_storedqusi512_mask:
7678   case X86::BI__builtin_ia32_storedquhi512_mask:
7679   case X86::BI__builtin_ia32_storedquqi512_mask:
7680   case X86::BI__builtin_ia32_storeupd512_mask:
7681   case X86::BI__builtin_ia32_storeups512_mask:
7682     return EmitX86MaskedStore(*this, Ops, 1);
7683 
7684   case X86::BI__builtin_ia32_storess128_mask:
7685   case X86::BI__builtin_ia32_storesd128_mask: {
7686     return EmitX86MaskedStore(*this, Ops, 16);
7687   }
7688   case X86::BI__builtin_ia32_vpopcntd_512:
7689   case X86::BI__builtin_ia32_vpopcntq_512: {
7690     llvm::Type *ResultType = ConvertType(E->getType());
7691     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
7692     return Builder.CreateCall(F, Ops);
7693   }
7694   case X86::BI__builtin_ia32_cvtmask2b128:
7695   case X86::BI__builtin_ia32_cvtmask2b256:
7696   case X86::BI__builtin_ia32_cvtmask2b512:
7697   case X86::BI__builtin_ia32_cvtmask2w128:
7698   case X86::BI__builtin_ia32_cvtmask2w256:
7699   case X86::BI__builtin_ia32_cvtmask2w512:
7700   case X86::BI__builtin_ia32_cvtmask2d128:
7701   case X86::BI__builtin_ia32_cvtmask2d256:
7702   case X86::BI__builtin_ia32_cvtmask2d512:
7703   case X86::BI__builtin_ia32_cvtmask2q128:
7704   case X86::BI__builtin_ia32_cvtmask2q256:
7705   case X86::BI__builtin_ia32_cvtmask2q512:
7706     return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
7707 
7708   case X86::BI__builtin_ia32_movdqa32store128_mask:
7709   case X86::BI__builtin_ia32_movdqa64store128_mask:
7710   case X86::BI__builtin_ia32_storeaps128_mask:
7711   case X86::BI__builtin_ia32_storeapd128_mask:
7712   case X86::BI__builtin_ia32_movdqa32store256_mask:
7713   case X86::BI__builtin_ia32_movdqa64store256_mask:
7714   case X86::BI__builtin_ia32_storeaps256_mask:
7715   case X86::BI__builtin_ia32_storeapd256_mask:
7716   case X86::BI__builtin_ia32_movdqa32store512_mask:
7717   case X86::BI__builtin_ia32_movdqa64store512_mask:
7718   case X86::BI__builtin_ia32_storeaps512_mask:
7719   case X86::BI__builtin_ia32_storeapd512_mask: {
7720     unsigned Align =
7721       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7722     return EmitX86MaskedStore(*this, Ops, Align);
7723   }
7724   case X86::BI__builtin_ia32_loadups128_mask:
7725   case X86::BI__builtin_ia32_loadups256_mask:
7726   case X86::BI__builtin_ia32_loadups512_mask:
7727   case X86::BI__builtin_ia32_loadupd128_mask:
7728   case X86::BI__builtin_ia32_loadupd256_mask:
7729   case X86::BI__builtin_ia32_loadupd512_mask:
7730   case X86::BI__builtin_ia32_loaddquqi128_mask:
7731   case X86::BI__builtin_ia32_loaddquqi256_mask:
7732   case X86::BI__builtin_ia32_loaddquqi512_mask:
7733   case X86::BI__builtin_ia32_loaddquhi128_mask:
7734   case X86::BI__builtin_ia32_loaddquhi256_mask:
7735   case X86::BI__builtin_ia32_loaddquhi512_mask:
7736   case X86::BI__builtin_ia32_loaddqusi128_mask:
7737   case X86::BI__builtin_ia32_loaddqusi256_mask:
7738   case X86::BI__builtin_ia32_loaddqusi512_mask:
7739   case X86::BI__builtin_ia32_loaddqudi128_mask:
7740   case X86::BI__builtin_ia32_loaddqudi256_mask:
7741   case X86::BI__builtin_ia32_loaddqudi512_mask:
7742     return EmitX86MaskedLoad(*this, Ops, 1);
7743 
7744   case X86::BI__builtin_ia32_loadss128_mask:
7745   case X86::BI__builtin_ia32_loadsd128_mask:
7746     return EmitX86MaskedLoad(*this, Ops, 16);
7747 
7748   case X86::BI__builtin_ia32_loadaps128_mask:
7749   case X86::BI__builtin_ia32_loadaps256_mask:
7750   case X86::BI__builtin_ia32_loadaps512_mask:
7751   case X86::BI__builtin_ia32_loadapd128_mask:
7752   case X86::BI__builtin_ia32_loadapd256_mask:
7753   case X86::BI__builtin_ia32_loadapd512_mask:
7754   case X86::BI__builtin_ia32_movdqa32load128_mask:
7755   case X86::BI__builtin_ia32_movdqa32load256_mask:
7756   case X86::BI__builtin_ia32_movdqa32load512_mask:
7757   case X86::BI__builtin_ia32_movdqa64load128_mask:
7758   case X86::BI__builtin_ia32_movdqa64load256_mask:
7759   case X86::BI__builtin_ia32_movdqa64load512_mask: {
7760     unsigned Align =
7761       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7762     return EmitX86MaskedLoad(*this, Ops, Align);
7763   }
7764 
7765   case X86::BI__builtin_ia32_vbroadcastf128_pd256:
7766   case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
7767     llvm::Type *DstTy = ConvertType(E->getType());
7768     return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
7769   }
7770 
7771   case X86::BI__builtin_ia32_storehps:
7772   case X86::BI__builtin_ia32_storelps: {
7773     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
7774     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
7775 
7776     // cast val v2i64
7777     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
7778 
7779     // extract (0, 1)
7780     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
7781     llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
7782     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
7783 
7784     // cast pointer to i64 & store
7785     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
7786     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7787   }
7788   case X86::BI__builtin_ia32_palignr128:
7789   case X86::BI__builtin_ia32_palignr256:
7790   case X86::BI__builtin_ia32_palignr512_mask: {
7791     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7792 
7793     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7794     assert(NumElts % 16 == 0);
7795 
7796     // If palignr is shifting the pair of vectors more than the size of two
7797     // lanes, emit zero.
7798     if (ShiftVal >= 32)
7799       return llvm::Constant::getNullValue(ConvertType(E->getType()));
7800 
7801     // If palignr is shifting the pair of input vectors more than one lane,
7802     // but less than two lanes, convert to shifting in zeroes.
7803     if (ShiftVal > 16) {
7804       ShiftVal -= 16;
7805       Ops[1] = Ops[0];
7806       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
7807     }
7808 
7809     uint32_t Indices[64];
7810     // 256-bit palignr operates on 128-bit lanes so we need to handle that
7811     for (unsigned l = 0; l != NumElts; l += 16) {
7812       for (unsigned i = 0; i != 16; ++i) {
7813         unsigned Idx = ShiftVal + i;
7814         if (Idx >= 16)
7815           Idx += NumElts - 16; // End of lane, switch operand.
7816         Indices[l + i] = Idx + l;
7817       }
7818     }
7819 
7820     Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
7821                                                makeArrayRef(Indices, NumElts),
7822                                                "palignr");
7823 
7824     // If this isn't a masked builtin, just return the align operation.
7825     if (Ops.size() == 3)
7826       return Align;
7827 
7828     return EmitX86Select(*this, Ops[4], Align, Ops[3]);
7829   }
7830 
7831   case X86::BI__builtin_ia32_movnti:
7832   case X86::BI__builtin_ia32_movnti64:
7833   case X86::BI__builtin_ia32_movntsd:
7834   case X86::BI__builtin_ia32_movntss: {
7835     llvm::MDNode *Node = llvm::MDNode::get(
7836         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7837 
7838     Value *Ptr = Ops[0];
7839     Value *Src = Ops[1];
7840 
7841     // Extract the 0'th element of the source vector.
7842     if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
7843         BuiltinID == X86::BI__builtin_ia32_movntss)
7844       Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
7845 
7846     // Convert the type of the pointer to a pointer to the stored type.
7847     Value *BC = Builder.CreateBitCast(
7848         Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
7849 
7850     // Unaligned nontemporal store of the scalar value.
7851     StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
7852     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7853     SI->setAlignment(1);
7854     return SI;
7855   }
7856 
7857   case X86::BI__builtin_ia32_selectb_128:
7858   case X86::BI__builtin_ia32_selectb_256:
7859   case X86::BI__builtin_ia32_selectb_512:
7860   case X86::BI__builtin_ia32_selectw_128:
7861   case X86::BI__builtin_ia32_selectw_256:
7862   case X86::BI__builtin_ia32_selectw_512:
7863   case X86::BI__builtin_ia32_selectd_128:
7864   case X86::BI__builtin_ia32_selectd_256:
7865   case X86::BI__builtin_ia32_selectd_512:
7866   case X86::BI__builtin_ia32_selectq_128:
7867   case X86::BI__builtin_ia32_selectq_256:
7868   case X86::BI__builtin_ia32_selectq_512:
7869   case X86::BI__builtin_ia32_selectps_128:
7870   case X86::BI__builtin_ia32_selectps_256:
7871   case X86::BI__builtin_ia32_selectps_512:
7872   case X86::BI__builtin_ia32_selectpd_128:
7873   case X86::BI__builtin_ia32_selectpd_256:
7874   case X86::BI__builtin_ia32_selectpd_512:
7875     return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
7876   case X86::BI__builtin_ia32_pcmpeqb128_mask:
7877   case X86::BI__builtin_ia32_pcmpeqb256_mask:
7878   case X86::BI__builtin_ia32_pcmpeqb512_mask:
7879   case X86::BI__builtin_ia32_pcmpeqw128_mask:
7880   case X86::BI__builtin_ia32_pcmpeqw256_mask:
7881   case X86::BI__builtin_ia32_pcmpeqw512_mask:
7882   case X86::BI__builtin_ia32_pcmpeqd128_mask:
7883   case X86::BI__builtin_ia32_pcmpeqd256_mask:
7884   case X86::BI__builtin_ia32_pcmpeqd512_mask:
7885   case X86::BI__builtin_ia32_pcmpeqq128_mask:
7886   case X86::BI__builtin_ia32_pcmpeqq256_mask:
7887   case X86::BI__builtin_ia32_pcmpeqq512_mask:
7888     return EmitX86MaskedCompare(*this, 0, false, Ops);
7889   case X86::BI__builtin_ia32_pcmpgtb128_mask:
7890   case X86::BI__builtin_ia32_pcmpgtb256_mask:
7891   case X86::BI__builtin_ia32_pcmpgtb512_mask:
7892   case X86::BI__builtin_ia32_pcmpgtw128_mask:
7893   case X86::BI__builtin_ia32_pcmpgtw256_mask:
7894   case X86::BI__builtin_ia32_pcmpgtw512_mask:
7895   case X86::BI__builtin_ia32_pcmpgtd128_mask:
7896   case X86::BI__builtin_ia32_pcmpgtd256_mask:
7897   case X86::BI__builtin_ia32_pcmpgtd512_mask:
7898   case X86::BI__builtin_ia32_pcmpgtq128_mask:
7899   case X86::BI__builtin_ia32_pcmpgtq256_mask:
7900   case X86::BI__builtin_ia32_pcmpgtq512_mask:
7901     return EmitX86MaskedCompare(*this, 6, true, Ops);
7902   case X86::BI__builtin_ia32_cmpb128_mask:
7903   case X86::BI__builtin_ia32_cmpb256_mask:
7904   case X86::BI__builtin_ia32_cmpb512_mask:
7905   case X86::BI__builtin_ia32_cmpw128_mask:
7906   case X86::BI__builtin_ia32_cmpw256_mask:
7907   case X86::BI__builtin_ia32_cmpw512_mask:
7908   case X86::BI__builtin_ia32_cmpd128_mask:
7909   case X86::BI__builtin_ia32_cmpd256_mask:
7910   case X86::BI__builtin_ia32_cmpd512_mask:
7911   case X86::BI__builtin_ia32_cmpq128_mask:
7912   case X86::BI__builtin_ia32_cmpq256_mask:
7913   case X86::BI__builtin_ia32_cmpq512_mask: {
7914     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7915     return EmitX86MaskedCompare(*this, CC, true, Ops);
7916   }
7917   case X86::BI__builtin_ia32_ucmpb128_mask:
7918   case X86::BI__builtin_ia32_ucmpb256_mask:
7919   case X86::BI__builtin_ia32_ucmpb512_mask:
7920   case X86::BI__builtin_ia32_ucmpw128_mask:
7921   case X86::BI__builtin_ia32_ucmpw256_mask:
7922   case X86::BI__builtin_ia32_ucmpw512_mask:
7923   case X86::BI__builtin_ia32_ucmpd128_mask:
7924   case X86::BI__builtin_ia32_ucmpd256_mask:
7925   case X86::BI__builtin_ia32_ucmpd512_mask:
7926   case X86::BI__builtin_ia32_ucmpq128_mask:
7927   case X86::BI__builtin_ia32_ucmpq256_mask:
7928   case X86::BI__builtin_ia32_ucmpq512_mask: {
7929     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7930     return EmitX86MaskedCompare(*this, CC, false, Ops);
7931   }
7932 
7933   case X86::BI__builtin_ia32_vplzcntd_128_mask:
7934   case X86::BI__builtin_ia32_vplzcntd_256_mask:
7935   case X86::BI__builtin_ia32_vplzcntd_512_mask:
7936   case X86::BI__builtin_ia32_vplzcntq_128_mask:
7937   case X86::BI__builtin_ia32_vplzcntq_256_mask:
7938   case X86::BI__builtin_ia32_vplzcntq_512_mask: {
7939     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
7940     return EmitX86Select(*this, Ops[2],
7941                          Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
7942                          Ops[1]);
7943   }
7944 
7945   case X86::BI__builtin_ia32_pmaxsb128:
7946   case X86::BI__builtin_ia32_pmaxsw128:
7947   case X86::BI__builtin_ia32_pmaxsd128:
7948   case X86::BI__builtin_ia32_pmaxsq128_mask:
7949   case X86::BI__builtin_ia32_pmaxsb256:
7950   case X86::BI__builtin_ia32_pmaxsw256:
7951   case X86::BI__builtin_ia32_pmaxsd256:
7952   case X86::BI__builtin_ia32_pmaxsq256_mask:
7953   case X86::BI__builtin_ia32_pmaxsb512_mask:
7954   case X86::BI__builtin_ia32_pmaxsw512_mask:
7955   case X86::BI__builtin_ia32_pmaxsd512_mask:
7956   case X86::BI__builtin_ia32_pmaxsq512_mask:
7957     return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
7958   case X86::BI__builtin_ia32_pmaxub128:
7959   case X86::BI__builtin_ia32_pmaxuw128:
7960   case X86::BI__builtin_ia32_pmaxud128:
7961   case X86::BI__builtin_ia32_pmaxuq128_mask:
7962   case X86::BI__builtin_ia32_pmaxub256:
7963   case X86::BI__builtin_ia32_pmaxuw256:
7964   case X86::BI__builtin_ia32_pmaxud256:
7965   case X86::BI__builtin_ia32_pmaxuq256_mask:
7966   case X86::BI__builtin_ia32_pmaxub512_mask:
7967   case X86::BI__builtin_ia32_pmaxuw512_mask:
7968   case X86::BI__builtin_ia32_pmaxud512_mask:
7969   case X86::BI__builtin_ia32_pmaxuq512_mask:
7970     return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
7971   case X86::BI__builtin_ia32_pminsb128:
7972   case X86::BI__builtin_ia32_pminsw128:
7973   case X86::BI__builtin_ia32_pminsd128:
7974   case X86::BI__builtin_ia32_pminsq128_mask:
7975   case X86::BI__builtin_ia32_pminsb256:
7976   case X86::BI__builtin_ia32_pminsw256:
7977   case X86::BI__builtin_ia32_pminsd256:
7978   case X86::BI__builtin_ia32_pminsq256_mask:
7979   case X86::BI__builtin_ia32_pminsb512_mask:
7980   case X86::BI__builtin_ia32_pminsw512_mask:
7981   case X86::BI__builtin_ia32_pminsd512_mask:
7982   case X86::BI__builtin_ia32_pminsq512_mask:
7983     return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
7984   case X86::BI__builtin_ia32_pminub128:
7985   case X86::BI__builtin_ia32_pminuw128:
7986   case X86::BI__builtin_ia32_pminud128:
7987   case X86::BI__builtin_ia32_pminuq128_mask:
7988   case X86::BI__builtin_ia32_pminub256:
7989   case X86::BI__builtin_ia32_pminuw256:
7990   case X86::BI__builtin_ia32_pminud256:
7991   case X86::BI__builtin_ia32_pminuq256_mask:
7992   case X86::BI__builtin_ia32_pminub512_mask:
7993   case X86::BI__builtin_ia32_pminuw512_mask:
7994   case X86::BI__builtin_ia32_pminud512_mask:
7995   case X86::BI__builtin_ia32_pminuq512_mask:
7996     return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
7997 
7998   // 3DNow!
7999   case X86::BI__builtin_ia32_pswapdsf:
8000   case X86::BI__builtin_ia32_pswapdsi: {
8001     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
8002     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
8003     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
8004     return Builder.CreateCall(F, Ops, "pswapd");
8005   }
8006   case X86::BI__builtin_ia32_rdrand16_step:
8007   case X86::BI__builtin_ia32_rdrand32_step:
8008   case X86::BI__builtin_ia32_rdrand64_step:
8009   case X86::BI__builtin_ia32_rdseed16_step:
8010   case X86::BI__builtin_ia32_rdseed32_step:
8011   case X86::BI__builtin_ia32_rdseed64_step: {
8012     Intrinsic::ID ID;
8013     switch (BuiltinID) {
8014     default: llvm_unreachable("Unsupported intrinsic!");
8015     case X86::BI__builtin_ia32_rdrand16_step:
8016       ID = Intrinsic::x86_rdrand_16;
8017       break;
8018     case X86::BI__builtin_ia32_rdrand32_step:
8019       ID = Intrinsic::x86_rdrand_32;
8020       break;
8021     case X86::BI__builtin_ia32_rdrand64_step:
8022       ID = Intrinsic::x86_rdrand_64;
8023       break;
8024     case X86::BI__builtin_ia32_rdseed16_step:
8025       ID = Intrinsic::x86_rdseed_16;
8026       break;
8027     case X86::BI__builtin_ia32_rdseed32_step:
8028       ID = Intrinsic::x86_rdseed_32;
8029       break;
8030     case X86::BI__builtin_ia32_rdseed64_step:
8031       ID = Intrinsic::x86_rdseed_64;
8032       break;
8033     }
8034 
8035     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
8036     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
8037                                       Ops[0]);
8038     return Builder.CreateExtractValue(Call, 1);
8039   }
8040 
8041   // SSE packed comparison intrinsics
8042   case X86::BI__builtin_ia32_cmpeqps:
8043   case X86::BI__builtin_ia32_cmpeqpd:
8044     return getVectorFCmpIR(CmpInst::FCMP_OEQ);
8045   case X86::BI__builtin_ia32_cmpltps:
8046   case X86::BI__builtin_ia32_cmpltpd:
8047     return getVectorFCmpIR(CmpInst::FCMP_OLT);
8048   case X86::BI__builtin_ia32_cmpleps:
8049   case X86::BI__builtin_ia32_cmplepd:
8050     return getVectorFCmpIR(CmpInst::FCMP_OLE);
8051   case X86::BI__builtin_ia32_cmpunordps:
8052   case X86::BI__builtin_ia32_cmpunordpd:
8053     return getVectorFCmpIR(CmpInst::FCMP_UNO);
8054   case X86::BI__builtin_ia32_cmpneqps:
8055   case X86::BI__builtin_ia32_cmpneqpd:
8056     return getVectorFCmpIR(CmpInst::FCMP_UNE);
8057   case X86::BI__builtin_ia32_cmpnltps:
8058   case X86::BI__builtin_ia32_cmpnltpd:
8059     return getVectorFCmpIR(CmpInst::FCMP_UGE);
8060   case X86::BI__builtin_ia32_cmpnleps:
8061   case X86::BI__builtin_ia32_cmpnlepd:
8062     return getVectorFCmpIR(CmpInst::FCMP_UGT);
8063   case X86::BI__builtin_ia32_cmpordps:
8064   case X86::BI__builtin_ia32_cmpordpd:
8065     return getVectorFCmpIR(CmpInst::FCMP_ORD);
8066   case X86::BI__builtin_ia32_cmpps:
8067   case X86::BI__builtin_ia32_cmpps256:
8068   case X86::BI__builtin_ia32_cmppd:
8069   case X86::BI__builtin_ia32_cmppd256: {
8070     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
8071     // If this one of the SSE immediates, we can use native IR.
8072     if (CC < 8) {
8073       FCmpInst::Predicate Pred;
8074       switch (CC) {
8075       case 0: Pred = FCmpInst::FCMP_OEQ; break;
8076       case 1: Pred = FCmpInst::FCMP_OLT; break;
8077       case 2: Pred = FCmpInst::FCMP_OLE; break;
8078       case 3: Pred = FCmpInst::FCMP_UNO; break;
8079       case 4: Pred = FCmpInst::FCMP_UNE; break;
8080       case 5: Pred = FCmpInst::FCMP_UGE; break;
8081       case 6: Pred = FCmpInst::FCMP_UGT; break;
8082       case 7: Pred = FCmpInst::FCMP_ORD; break;
8083       }
8084       return getVectorFCmpIR(Pred);
8085     }
8086 
8087     // We can't handle 8-31 immediates with native IR, use the intrinsic.
8088     // Except for predicates that create constants.
8089     Intrinsic::ID ID;
8090     switch (BuiltinID) {
8091     default: llvm_unreachable("Unsupported intrinsic!");
8092     case X86::BI__builtin_ia32_cmpps:
8093       ID = Intrinsic::x86_sse_cmp_ps;
8094       break;
8095     case X86::BI__builtin_ia32_cmpps256:
8096       // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
8097       // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
8098       if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
8099          Value *Constant = (CC == 0xf || CC == 0x1f) ?
8100                 llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) :
8101                 llvm::Constant::getNullValue(Builder.getInt32Ty());
8102          Value *Vec = Builder.CreateVectorSplat(
8103                         Ops[0]->getType()->getVectorNumElements(), Constant);
8104          return Builder.CreateBitCast(Vec, Ops[0]->getType());
8105       }
8106       ID = Intrinsic::x86_avx_cmp_ps_256;
8107       break;
8108     case X86::BI__builtin_ia32_cmppd:
8109       ID = Intrinsic::x86_sse2_cmp_pd;
8110       break;
8111     case X86::BI__builtin_ia32_cmppd256:
8112       // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
8113       // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
8114       if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
8115          Value *Constant = (CC == 0xf || CC == 0x1f) ?
8116                 llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) :
8117                 llvm::Constant::getNullValue(Builder.getInt64Ty());
8118          Value *Vec = Builder.CreateVectorSplat(
8119                         Ops[0]->getType()->getVectorNumElements(), Constant);
8120          return Builder.CreateBitCast(Vec, Ops[0]->getType());
8121       }
8122       ID = Intrinsic::x86_avx_cmp_pd_256;
8123       break;
8124     }
8125 
8126     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
8127   }
8128 
8129   // SSE scalar comparison intrinsics
8130   case X86::BI__builtin_ia32_cmpeqss:
8131     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
8132   case X86::BI__builtin_ia32_cmpltss:
8133     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
8134   case X86::BI__builtin_ia32_cmpless:
8135     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
8136   case X86::BI__builtin_ia32_cmpunordss:
8137     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
8138   case X86::BI__builtin_ia32_cmpneqss:
8139     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
8140   case X86::BI__builtin_ia32_cmpnltss:
8141     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
8142   case X86::BI__builtin_ia32_cmpnless:
8143     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
8144   case X86::BI__builtin_ia32_cmpordss:
8145     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
8146   case X86::BI__builtin_ia32_cmpeqsd:
8147     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
8148   case X86::BI__builtin_ia32_cmpltsd:
8149     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
8150   case X86::BI__builtin_ia32_cmplesd:
8151     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
8152   case X86::BI__builtin_ia32_cmpunordsd:
8153     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
8154   case X86::BI__builtin_ia32_cmpneqsd:
8155     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
8156   case X86::BI__builtin_ia32_cmpnltsd:
8157     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
8158   case X86::BI__builtin_ia32_cmpnlesd:
8159     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
8160   case X86::BI__builtin_ia32_cmpordsd:
8161     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
8162 
8163   case X86::BI__emul:
8164   case X86::BI__emulu: {
8165     llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
8166     bool isSigned = (BuiltinID == X86::BI__emul);
8167     Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
8168     Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
8169     return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
8170   }
8171   case X86::BI__mulh:
8172   case X86::BI__umulh:
8173   case X86::BI_mul128:
8174   case X86::BI_umul128: {
8175     llvm::Type *ResType = ConvertType(E->getType());
8176     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
8177 
8178     bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
8179     Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
8180     Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
8181 
8182     Value *MulResult, *HigherBits;
8183     if (IsSigned) {
8184       MulResult = Builder.CreateNSWMul(LHS, RHS);
8185       HigherBits = Builder.CreateAShr(MulResult, 64);
8186     } else {
8187       MulResult = Builder.CreateNUWMul(LHS, RHS);
8188       HigherBits = Builder.CreateLShr(MulResult, 64);
8189     }
8190     HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
8191 
8192     if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
8193       return HigherBits;
8194 
8195     Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
8196     Builder.CreateStore(HigherBits, HighBitsAddress);
8197     return Builder.CreateIntCast(MulResult, ResType, IsSigned);
8198   }
8199 
8200   case X86::BI__faststorefence: {
8201     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8202                                llvm::SyncScope::System);
8203   }
8204   case X86::BI_ReadWriteBarrier:
8205   case X86::BI_ReadBarrier:
8206   case X86::BI_WriteBarrier: {
8207     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8208                                llvm::SyncScope::SingleThread);
8209   }
8210   case X86::BI_BitScanForward:
8211   case X86::BI_BitScanForward64:
8212     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
8213   case X86::BI_BitScanReverse:
8214   case X86::BI_BitScanReverse64:
8215     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
8216 
8217   case X86::BI_InterlockedAnd64:
8218     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
8219   case X86::BI_InterlockedExchange64:
8220     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
8221   case X86::BI_InterlockedExchangeAdd64:
8222     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
8223   case X86::BI_InterlockedExchangeSub64:
8224     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
8225   case X86::BI_InterlockedOr64:
8226     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
8227   case X86::BI_InterlockedXor64:
8228     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
8229   case X86::BI_InterlockedDecrement64:
8230     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
8231   case X86::BI_InterlockedIncrement64:
8232     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
8233 
8234   case X86::BI_AddressOfReturnAddress: {
8235     Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
8236     return Builder.CreateCall(F);
8237   }
8238   case X86::BI__stosb: {
8239     // We treat __stosb as a volatile memset - it may not generate "rep stosb"
8240     // instruction, but it will create a memset that won't be optimized away.
8241     return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
8242   }
8243   case X86::BI__ud2:
8244     // llvm.trap makes a ud2a instruction on x86.
8245     return EmitTrapCall(Intrinsic::trap);
8246   case X86::BI__int2c: {
8247     // This syscall signals a driver assertion failure in x86 NT kernels.
8248     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
8249     llvm::InlineAsm *IA =
8250         llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true);
8251     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
8252         getLLVMContext(), llvm::AttributeList::FunctionIndex,
8253         llvm::Attribute::NoReturn);
8254     CallSite CS = Builder.CreateCall(IA);
8255     CS.setAttributes(NoReturnAttr);
8256     return CS.getInstruction();
8257   }
8258   case X86::BI__readfsbyte:
8259   case X86::BI__readfsword:
8260   case X86::BI__readfsdword:
8261   case X86::BI__readfsqword: {
8262     llvm::Type *IntTy = ConvertType(E->getType());
8263     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8264                                         llvm::PointerType::get(IntTy, 257));
8265     LoadInst *Load = Builder.CreateAlignedLoad(
8266         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8267     Load->setVolatile(true);
8268     return Load;
8269   }
8270   case X86::BI__readgsbyte:
8271   case X86::BI__readgsword:
8272   case X86::BI__readgsdword:
8273   case X86::BI__readgsqword: {
8274     llvm::Type *IntTy = ConvertType(E->getType());
8275     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8276                                         llvm::PointerType::get(IntTy, 256));
8277     LoadInst *Load = Builder.CreateAlignedLoad(
8278         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8279     Load->setVolatile(true);
8280     return Load;
8281   }
8282   }
8283 }
8284 
8285 
8286 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
8287                                            const CallExpr *E) {
8288   SmallVector<Value*, 4> Ops;
8289 
8290   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
8291     Ops.push_back(EmitScalarExpr(E->getArg(i)));
8292 
8293   Intrinsic::ID ID = Intrinsic::not_intrinsic;
8294 
8295   switch (BuiltinID) {
8296   default: return nullptr;
8297 
8298   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
8299   // call __builtin_readcyclecounter.
8300   case PPC::BI__builtin_ppc_get_timebase:
8301     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
8302 
8303   // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
8304   case PPC::BI__builtin_altivec_lvx:
8305   case PPC::BI__builtin_altivec_lvxl:
8306   case PPC::BI__builtin_altivec_lvebx:
8307   case PPC::BI__builtin_altivec_lvehx:
8308   case PPC::BI__builtin_altivec_lvewx:
8309   case PPC::BI__builtin_altivec_lvsl:
8310   case PPC::BI__builtin_altivec_lvsr:
8311   case PPC::BI__builtin_vsx_lxvd2x:
8312   case PPC::BI__builtin_vsx_lxvw4x:
8313   case PPC::BI__builtin_vsx_lxvd2x_be:
8314   case PPC::BI__builtin_vsx_lxvw4x_be:
8315   case PPC::BI__builtin_vsx_lxvl:
8316   case PPC::BI__builtin_vsx_lxvll:
8317   {
8318     if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
8319        BuiltinID == PPC::BI__builtin_vsx_lxvll){
8320       Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
8321     }else {
8322       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8323       Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
8324       Ops.pop_back();
8325     }
8326 
8327     switch (BuiltinID) {
8328     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
8329     case PPC::BI__builtin_altivec_lvx:
8330       ID = Intrinsic::ppc_altivec_lvx;
8331       break;
8332     case PPC::BI__builtin_altivec_lvxl:
8333       ID = Intrinsic::ppc_altivec_lvxl;
8334       break;
8335     case PPC::BI__builtin_altivec_lvebx:
8336       ID = Intrinsic::ppc_altivec_lvebx;
8337       break;
8338     case PPC::BI__builtin_altivec_lvehx:
8339       ID = Intrinsic::ppc_altivec_lvehx;
8340       break;
8341     case PPC::BI__builtin_altivec_lvewx:
8342       ID = Intrinsic::ppc_altivec_lvewx;
8343       break;
8344     case PPC::BI__builtin_altivec_lvsl:
8345       ID = Intrinsic::ppc_altivec_lvsl;
8346       break;
8347     case PPC::BI__builtin_altivec_lvsr:
8348       ID = Intrinsic::ppc_altivec_lvsr;
8349       break;
8350     case PPC::BI__builtin_vsx_lxvd2x:
8351       ID = Intrinsic::ppc_vsx_lxvd2x;
8352       break;
8353     case PPC::BI__builtin_vsx_lxvw4x:
8354       ID = Intrinsic::ppc_vsx_lxvw4x;
8355       break;
8356     case PPC::BI__builtin_vsx_lxvd2x_be:
8357       ID = Intrinsic::ppc_vsx_lxvd2x_be;
8358       break;
8359     case PPC::BI__builtin_vsx_lxvw4x_be:
8360       ID = Intrinsic::ppc_vsx_lxvw4x_be;
8361       break;
8362     case PPC::BI__builtin_vsx_lxvl:
8363       ID = Intrinsic::ppc_vsx_lxvl;
8364       break;
8365     case PPC::BI__builtin_vsx_lxvll:
8366       ID = Intrinsic::ppc_vsx_lxvll;
8367       break;
8368     }
8369     llvm::Function *F = CGM.getIntrinsic(ID);
8370     return Builder.CreateCall(F, Ops, "");
8371   }
8372 
8373   // vec_st, vec_xst_be
8374   case PPC::BI__builtin_altivec_stvx:
8375   case PPC::BI__builtin_altivec_stvxl:
8376   case PPC::BI__builtin_altivec_stvebx:
8377   case PPC::BI__builtin_altivec_stvehx:
8378   case PPC::BI__builtin_altivec_stvewx:
8379   case PPC::BI__builtin_vsx_stxvd2x:
8380   case PPC::BI__builtin_vsx_stxvw4x:
8381   case PPC::BI__builtin_vsx_stxvd2x_be:
8382   case PPC::BI__builtin_vsx_stxvw4x_be:
8383   case PPC::BI__builtin_vsx_stxvl:
8384   case PPC::BI__builtin_vsx_stxvll:
8385   {
8386     if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
8387       BuiltinID == PPC::BI__builtin_vsx_stxvll ){
8388       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8389     }else {
8390       Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
8391       Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
8392       Ops.pop_back();
8393     }
8394 
8395     switch (BuiltinID) {
8396     default: llvm_unreachable("Unsupported st intrinsic!");
8397     case PPC::BI__builtin_altivec_stvx:
8398       ID = Intrinsic::ppc_altivec_stvx;
8399       break;
8400     case PPC::BI__builtin_altivec_stvxl:
8401       ID = Intrinsic::ppc_altivec_stvxl;
8402       break;
8403     case PPC::BI__builtin_altivec_stvebx:
8404       ID = Intrinsic::ppc_altivec_stvebx;
8405       break;
8406     case PPC::BI__builtin_altivec_stvehx:
8407       ID = Intrinsic::ppc_altivec_stvehx;
8408       break;
8409     case PPC::BI__builtin_altivec_stvewx:
8410       ID = Intrinsic::ppc_altivec_stvewx;
8411       break;
8412     case PPC::BI__builtin_vsx_stxvd2x:
8413       ID = Intrinsic::ppc_vsx_stxvd2x;
8414       break;
8415     case PPC::BI__builtin_vsx_stxvw4x:
8416       ID = Intrinsic::ppc_vsx_stxvw4x;
8417       break;
8418     case PPC::BI__builtin_vsx_stxvd2x_be:
8419       ID = Intrinsic::ppc_vsx_stxvd2x_be;
8420       break;
8421     case PPC::BI__builtin_vsx_stxvw4x_be:
8422       ID = Intrinsic::ppc_vsx_stxvw4x_be;
8423       break;
8424     case PPC::BI__builtin_vsx_stxvl:
8425       ID = Intrinsic::ppc_vsx_stxvl;
8426       break;
8427     case PPC::BI__builtin_vsx_stxvll:
8428       ID = Intrinsic::ppc_vsx_stxvll;
8429       break;
8430     }
8431     llvm::Function *F = CGM.getIntrinsic(ID);
8432     return Builder.CreateCall(F, Ops, "");
8433   }
8434   // Square root
8435   case PPC::BI__builtin_vsx_xvsqrtsp:
8436   case PPC::BI__builtin_vsx_xvsqrtdp: {
8437     llvm::Type *ResultType = ConvertType(E->getType());
8438     Value *X = EmitScalarExpr(E->getArg(0));
8439     ID = Intrinsic::sqrt;
8440     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8441     return Builder.CreateCall(F, X);
8442   }
8443   // Count leading zeros
8444   case PPC::BI__builtin_altivec_vclzb:
8445   case PPC::BI__builtin_altivec_vclzh:
8446   case PPC::BI__builtin_altivec_vclzw:
8447   case PPC::BI__builtin_altivec_vclzd: {
8448     llvm::Type *ResultType = ConvertType(E->getType());
8449     Value *X = EmitScalarExpr(E->getArg(0));
8450     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8451     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8452     return Builder.CreateCall(F, {X, Undef});
8453   }
8454   case PPC::BI__builtin_altivec_vctzb:
8455   case PPC::BI__builtin_altivec_vctzh:
8456   case PPC::BI__builtin_altivec_vctzw:
8457   case PPC::BI__builtin_altivec_vctzd: {
8458     llvm::Type *ResultType = ConvertType(E->getType());
8459     Value *X = EmitScalarExpr(E->getArg(0));
8460     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8461     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8462     return Builder.CreateCall(F, {X, Undef});
8463   }
8464   case PPC::BI__builtin_altivec_vpopcntb:
8465   case PPC::BI__builtin_altivec_vpopcnth:
8466   case PPC::BI__builtin_altivec_vpopcntw:
8467   case PPC::BI__builtin_altivec_vpopcntd: {
8468     llvm::Type *ResultType = ConvertType(E->getType());
8469     Value *X = EmitScalarExpr(E->getArg(0));
8470     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8471     return Builder.CreateCall(F, X);
8472   }
8473   // Copy sign
8474   case PPC::BI__builtin_vsx_xvcpsgnsp:
8475   case PPC::BI__builtin_vsx_xvcpsgndp: {
8476     llvm::Type *ResultType = ConvertType(E->getType());
8477     Value *X = EmitScalarExpr(E->getArg(0));
8478     Value *Y = EmitScalarExpr(E->getArg(1));
8479     ID = Intrinsic::copysign;
8480     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8481     return Builder.CreateCall(F, {X, Y});
8482   }
8483   // Rounding/truncation
8484   case PPC::BI__builtin_vsx_xvrspip:
8485   case PPC::BI__builtin_vsx_xvrdpip:
8486   case PPC::BI__builtin_vsx_xvrdpim:
8487   case PPC::BI__builtin_vsx_xvrspim:
8488   case PPC::BI__builtin_vsx_xvrdpi:
8489   case PPC::BI__builtin_vsx_xvrspi:
8490   case PPC::BI__builtin_vsx_xvrdpic:
8491   case PPC::BI__builtin_vsx_xvrspic:
8492   case PPC::BI__builtin_vsx_xvrdpiz:
8493   case PPC::BI__builtin_vsx_xvrspiz: {
8494     llvm::Type *ResultType = ConvertType(E->getType());
8495     Value *X = EmitScalarExpr(E->getArg(0));
8496     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
8497         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
8498       ID = Intrinsic::floor;
8499     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
8500              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
8501       ID = Intrinsic::round;
8502     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
8503              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
8504       ID = Intrinsic::nearbyint;
8505     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
8506              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
8507       ID = Intrinsic::ceil;
8508     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
8509              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
8510       ID = Intrinsic::trunc;
8511     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8512     return Builder.CreateCall(F, X);
8513   }
8514 
8515   // Absolute value
8516   case PPC::BI__builtin_vsx_xvabsdp:
8517   case PPC::BI__builtin_vsx_xvabssp: {
8518     llvm::Type *ResultType = ConvertType(E->getType());
8519     Value *X = EmitScalarExpr(E->getArg(0));
8520     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8521     return Builder.CreateCall(F, X);
8522   }
8523 
8524   // FMA variations
8525   case PPC::BI__builtin_vsx_xvmaddadp:
8526   case PPC::BI__builtin_vsx_xvmaddasp:
8527   case PPC::BI__builtin_vsx_xvnmaddadp:
8528   case PPC::BI__builtin_vsx_xvnmaddasp:
8529   case PPC::BI__builtin_vsx_xvmsubadp:
8530   case PPC::BI__builtin_vsx_xvmsubasp:
8531   case PPC::BI__builtin_vsx_xvnmsubadp:
8532   case PPC::BI__builtin_vsx_xvnmsubasp: {
8533     llvm::Type *ResultType = ConvertType(E->getType());
8534     Value *X = EmitScalarExpr(E->getArg(0));
8535     Value *Y = EmitScalarExpr(E->getArg(1));
8536     Value *Z = EmitScalarExpr(E->getArg(2));
8537     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8538     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8539     switch (BuiltinID) {
8540       case PPC::BI__builtin_vsx_xvmaddadp:
8541       case PPC::BI__builtin_vsx_xvmaddasp:
8542         return Builder.CreateCall(F, {X, Y, Z});
8543       case PPC::BI__builtin_vsx_xvnmaddadp:
8544       case PPC::BI__builtin_vsx_xvnmaddasp:
8545         return Builder.CreateFSub(Zero,
8546                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
8547       case PPC::BI__builtin_vsx_xvmsubadp:
8548       case PPC::BI__builtin_vsx_xvmsubasp:
8549         return Builder.CreateCall(F,
8550                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8551       case PPC::BI__builtin_vsx_xvnmsubadp:
8552       case PPC::BI__builtin_vsx_xvnmsubasp:
8553         Value *FsubRes =
8554           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8555         return Builder.CreateFSub(Zero, FsubRes, "sub");
8556     }
8557     llvm_unreachable("Unknown FMA operation");
8558     return nullptr; // Suppress no-return warning
8559   }
8560 
8561   case PPC::BI__builtin_vsx_insertword: {
8562     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
8563 
8564     // Third argument is a compile time constant int. It must be clamped to
8565     // to the range [0, 12].
8566     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8567     assert(ArgCI &&
8568            "Third arg to xxinsertw intrinsic must be constant integer");
8569     const int64_t MaxIndex = 12;
8570     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8571 
8572     // The builtin semantics don't exactly match the xxinsertw instructions
8573     // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
8574     // word from the first argument, and inserts it in the second argument. The
8575     // instruction extracts the word from its second input register and inserts
8576     // it into its first input register, so swap the first and second arguments.
8577     std::swap(Ops[0], Ops[1]);
8578 
8579     // Need to cast the second argument from a vector of unsigned int to a
8580     // vector of long long.
8581     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8582 
8583     if (getTarget().isLittleEndian()) {
8584       // Create a shuffle mask of (1, 0)
8585       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8586                                    ConstantInt::get(Int32Ty, 0)
8587                                  };
8588       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8589 
8590       // Reverse the double words in the vector we will extract from.
8591       Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8592       Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
8593 
8594       // Reverse the index.
8595       Index = MaxIndex - Index;
8596     }
8597 
8598     // Intrinsic expects the first arg to be a vector of int.
8599     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8600     Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
8601     return Builder.CreateCall(F, Ops);
8602   }
8603 
8604   case PPC::BI__builtin_vsx_extractuword: {
8605     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
8606 
8607     // Intrinsic expects the first argument to be a vector of doublewords.
8608     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8609 
8610     // The second argument is a compile time constant int that needs to
8611     // be clamped to the range [0, 12].
8612     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
8613     assert(ArgCI &&
8614            "Second Arg to xxextractuw intrinsic must be a constant integer!");
8615     const int64_t MaxIndex = 12;
8616     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8617 
8618     if (getTarget().isLittleEndian()) {
8619       // Reverse the index.
8620       Index = MaxIndex - Index;
8621       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8622 
8623       // Emit the call, then reverse the double words of the results vector.
8624       Value *Call = Builder.CreateCall(F, Ops);
8625 
8626       // Create a shuffle mask of (1, 0)
8627       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8628                                    ConstantInt::get(Int32Ty, 0)
8629                                  };
8630       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8631 
8632       Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
8633       return ShuffleCall;
8634     } else {
8635       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8636       return Builder.CreateCall(F, Ops);
8637     }
8638   }
8639 
8640   case PPC::BI__builtin_vsx_xxpermdi: {
8641     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8642     assert(ArgCI && "Third arg must be constant integer!");
8643 
8644     unsigned Index = ArgCI->getZExtValue();
8645     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8646     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8647 
8648     // Element zero comes from the first input vector and element one comes from
8649     // the second. The element indices within each vector are numbered in big
8650     // endian order so the shuffle mask must be adjusted for this on little
8651     // endian platforms (i.e. index is complemented and source vector reversed).
8652     unsigned ElemIdx0;
8653     unsigned ElemIdx1;
8654     if (getTarget().isLittleEndian()) {
8655       ElemIdx0 = (~Index & 1) + 2;
8656       ElemIdx1 = (~Index & 2) >> 1;
8657     } else { // BigEndian
8658       ElemIdx0 = (Index & 2) >> 1;
8659       ElemIdx1 = 2 + (Index & 1);
8660     }
8661 
8662     Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
8663                                 ConstantInt::get(Int32Ty, ElemIdx1)};
8664     Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8665 
8666     Value *ShuffleCall =
8667         Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
8668     QualType BIRetType = E->getType();
8669     auto RetTy = ConvertType(BIRetType);
8670     return Builder.CreateBitCast(ShuffleCall, RetTy);
8671   }
8672 
8673   case PPC::BI__builtin_vsx_xxsldwi: {
8674     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8675     assert(ArgCI && "Third argument must be a compile time constant");
8676     unsigned Index = ArgCI->getZExtValue() & 0x3;
8677     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8678     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
8679 
8680     // Create a shuffle mask
8681     unsigned ElemIdx0;
8682     unsigned ElemIdx1;
8683     unsigned ElemIdx2;
8684     unsigned ElemIdx3;
8685     if (getTarget().isLittleEndian()) {
8686       // Little endian element N comes from element 8+N-Index of the
8687       // concatenated wide vector (of course, using modulo arithmetic on
8688       // the total number of elements).
8689       ElemIdx0 = (8 - Index) % 8;
8690       ElemIdx1 = (9 - Index) % 8;
8691       ElemIdx2 = (10 - Index) % 8;
8692       ElemIdx3 = (11 - Index) % 8;
8693     } else {
8694       // Big endian ElemIdx<N> = Index + N
8695       ElemIdx0 = Index;
8696       ElemIdx1 = Index + 1;
8697       ElemIdx2 = Index + 2;
8698       ElemIdx3 = Index + 3;
8699     }
8700 
8701     Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
8702                                 ConstantInt::get(Int32Ty, ElemIdx1),
8703                                 ConstantInt::get(Int32Ty, ElemIdx2),
8704                                 ConstantInt::get(Int32Ty, ElemIdx3)};
8705 
8706     Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8707     Value *ShuffleCall =
8708         Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
8709     QualType BIRetType = E->getType();
8710     auto RetTy = ConvertType(BIRetType);
8711     return Builder.CreateBitCast(ShuffleCall, RetTy);
8712   }
8713   }
8714 }
8715 
8716 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
8717                                               const CallExpr *E) {
8718   switch (BuiltinID) {
8719   case AMDGPU::BI__builtin_amdgcn_div_scale:
8720   case AMDGPU::BI__builtin_amdgcn_div_scalef: {
8721     // Translate from the intrinsics's struct return to the builtin's out
8722     // argument.
8723 
8724     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
8725 
8726     llvm::Value *X = EmitScalarExpr(E->getArg(0));
8727     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
8728     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
8729 
8730     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
8731                                            X->getType());
8732 
8733     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
8734 
8735     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
8736     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
8737 
8738     llvm::Type *RealFlagType
8739       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
8740 
8741     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
8742     Builder.CreateStore(FlagExt, FlagOutPtr);
8743     return Result;
8744   }
8745   case AMDGPU::BI__builtin_amdgcn_div_fmas:
8746   case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
8747     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
8748     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
8749     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
8750     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
8751 
8752     llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
8753                                       Src0->getType());
8754     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
8755     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
8756   }
8757 
8758   case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
8759     return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
8760   case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
8761     llvm::SmallVector<llvm::Value *, 5> Args;
8762     for (unsigned I = 0; I != 5; ++I)
8763       Args.push_back(EmitScalarExpr(E->getArg(I)));
8764     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,
8765                                     Args[0]->getType());
8766     return Builder.CreateCall(F, Args);
8767   }
8768   case AMDGPU::BI__builtin_amdgcn_div_fixup:
8769   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
8770   case AMDGPU::BI__builtin_amdgcn_div_fixuph:
8771     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
8772   case AMDGPU::BI__builtin_amdgcn_trig_preop:
8773   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
8774     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
8775   case AMDGPU::BI__builtin_amdgcn_rcp:
8776   case AMDGPU::BI__builtin_amdgcn_rcpf:
8777   case AMDGPU::BI__builtin_amdgcn_rcph:
8778     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
8779   case AMDGPU::BI__builtin_amdgcn_rsq:
8780   case AMDGPU::BI__builtin_amdgcn_rsqf:
8781   case AMDGPU::BI__builtin_amdgcn_rsqh:
8782     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
8783   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
8784   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
8785     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
8786   case AMDGPU::BI__builtin_amdgcn_sinf:
8787   case AMDGPU::BI__builtin_amdgcn_sinh:
8788     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
8789   case AMDGPU::BI__builtin_amdgcn_cosf:
8790   case AMDGPU::BI__builtin_amdgcn_cosh:
8791     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
8792   case AMDGPU::BI__builtin_amdgcn_log_clampf:
8793     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
8794   case AMDGPU::BI__builtin_amdgcn_ldexp:
8795   case AMDGPU::BI__builtin_amdgcn_ldexpf:
8796   case AMDGPU::BI__builtin_amdgcn_ldexph:
8797     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
8798   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
8799   case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
8800   case AMDGPU::BI__builtin_amdgcn_frexp_manth:
8801     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
8802   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
8803   case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
8804     Value *Src0 = EmitScalarExpr(E->getArg(0));
8805     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8806                                 { Builder.getInt32Ty(), Src0->getType() });
8807     return Builder.CreateCall(F, Src0);
8808   }
8809   case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
8810     Value *Src0 = EmitScalarExpr(E->getArg(0));
8811     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8812                                 { Builder.getInt16Ty(), Src0->getType() });
8813     return Builder.CreateCall(F, Src0);
8814   }
8815   case AMDGPU::BI__builtin_amdgcn_fract:
8816   case AMDGPU::BI__builtin_amdgcn_fractf:
8817   case AMDGPU::BI__builtin_amdgcn_fracth:
8818     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
8819   case AMDGPU::BI__builtin_amdgcn_lerp:
8820     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
8821   case AMDGPU::BI__builtin_amdgcn_uicmp:
8822   case AMDGPU::BI__builtin_amdgcn_uicmpl:
8823   case AMDGPU::BI__builtin_amdgcn_sicmp:
8824   case AMDGPU::BI__builtin_amdgcn_sicmpl:
8825     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
8826   case AMDGPU::BI__builtin_amdgcn_fcmp:
8827   case AMDGPU::BI__builtin_amdgcn_fcmpf:
8828     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
8829   case AMDGPU::BI__builtin_amdgcn_class:
8830   case AMDGPU::BI__builtin_amdgcn_classf:
8831   case AMDGPU::BI__builtin_amdgcn_classh:
8832     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
8833   case AMDGPU::BI__builtin_amdgcn_fmed3f:
8834   case AMDGPU::BI__builtin_amdgcn_fmed3h:
8835     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
8836   case AMDGPU::BI__builtin_amdgcn_read_exec: {
8837     CallInst *CI = cast<CallInst>(
8838       EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
8839     CI->setConvergent();
8840     return CI;
8841   }
8842 
8843   // amdgcn workitem
8844   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
8845     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
8846   case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
8847     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
8848   case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
8849     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
8850 
8851   // r600 intrinsics
8852   case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
8853   case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
8854     return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
8855   case AMDGPU::BI__builtin_r600_read_tidig_x:
8856     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
8857   case AMDGPU::BI__builtin_r600_read_tidig_y:
8858     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
8859   case AMDGPU::BI__builtin_r600_read_tidig_z:
8860     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
8861   default:
8862     return nullptr;
8863   }
8864 }
8865 
8866 /// Handle a SystemZ function in which the final argument is a pointer
8867 /// to an int that receives the post-instruction CC value.  At the LLVM level
8868 /// this is represented as a function that returns a {result, cc} pair.
8869 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
8870                                          unsigned IntrinsicID,
8871                                          const CallExpr *E) {
8872   unsigned NumArgs = E->getNumArgs() - 1;
8873   SmallVector<Value *, 8> Args(NumArgs);
8874   for (unsigned I = 0; I < NumArgs; ++I)
8875     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
8876   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
8877   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
8878   Value *Call = CGF.Builder.CreateCall(F, Args);
8879   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
8880   CGF.Builder.CreateStore(CC, CCPtr);
8881   return CGF.Builder.CreateExtractValue(Call, 0);
8882 }
8883 
8884 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
8885                                                const CallExpr *E) {
8886   switch (BuiltinID) {
8887   case SystemZ::BI__builtin_tbegin: {
8888     Value *TDB = EmitScalarExpr(E->getArg(0));
8889     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8890     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
8891     return Builder.CreateCall(F, {TDB, Control});
8892   }
8893   case SystemZ::BI__builtin_tbegin_nofloat: {
8894     Value *TDB = EmitScalarExpr(E->getArg(0));
8895     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8896     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
8897     return Builder.CreateCall(F, {TDB, Control});
8898   }
8899   case SystemZ::BI__builtin_tbeginc: {
8900     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
8901     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
8902     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
8903     return Builder.CreateCall(F, {TDB, Control});
8904   }
8905   case SystemZ::BI__builtin_tabort: {
8906     Value *Data = EmitScalarExpr(E->getArg(0));
8907     Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
8908     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
8909   }
8910   case SystemZ::BI__builtin_non_tx_store: {
8911     Value *Address = EmitScalarExpr(E->getArg(0));
8912     Value *Data = EmitScalarExpr(E->getArg(1));
8913     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
8914     return Builder.CreateCall(F, {Data, Address});
8915   }
8916 
8917   // Vector builtins.  Note that most vector builtins are mapped automatically
8918   // to target-specific LLVM intrinsics.  The ones handled specially here can
8919   // be represented via standard LLVM IR, which is preferable to enable common
8920   // LLVM optimizations.
8921 
8922   case SystemZ::BI__builtin_s390_vpopctb:
8923   case SystemZ::BI__builtin_s390_vpopcth:
8924   case SystemZ::BI__builtin_s390_vpopctf:
8925   case SystemZ::BI__builtin_s390_vpopctg: {
8926     llvm::Type *ResultType = ConvertType(E->getType());
8927     Value *X = EmitScalarExpr(E->getArg(0));
8928     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8929     return Builder.CreateCall(F, X);
8930   }
8931 
8932   case SystemZ::BI__builtin_s390_vclzb:
8933   case SystemZ::BI__builtin_s390_vclzh:
8934   case SystemZ::BI__builtin_s390_vclzf:
8935   case SystemZ::BI__builtin_s390_vclzg: {
8936     llvm::Type *ResultType = ConvertType(E->getType());
8937     Value *X = EmitScalarExpr(E->getArg(0));
8938     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8939     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8940     return Builder.CreateCall(F, {X, Undef});
8941   }
8942 
8943   case SystemZ::BI__builtin_s390_vctzb:
8944   case SystemZ::BI__builtin_s390_vctzh:
8945   case SystemZ::BI__builtin_s390_vctzf:
8946   case SystemZ::BI__builtin_s390_vctzg: {
8947     llvm::Type *ResultType = ConvertType(E->getType());
8948     Value *X = EmitScalarExpr(E->getArg(0));
8949     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8950     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8951     return Builder.CreateCall(F, {X, Undef});
8952   }
8953 
8954   case SystemZ::BI__builtin_s390_vfsqsb:
8955   case SystemZ::BI__builtin_s390_vfsqdb: {
8956     llvm::Type *ResultType = ConvertType(E->getType());
8957     Value *X = EmitScalarExpr(E->getArg(0));
8958     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
8959     return Builder.CreateCall(F, X);
8960   }
8961   case SystemZ::BI__builtin_s390_vfmasb:
8962   case SystemZ::BI__builtin_s390_vfmadb: {
8963     llvm::Type *ResultType = ConvertType(E->getType());
8964     Value *X = EmitScalarExpr(E->getArg(0));
8965     Value *Y = EmitScalarExpr(E->getArg(1));
8966     Value *Z = EmitScalarExpr(E->getArg(2));
8967     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8968     return Builder.CreateCall(F, {X, Y, Z});
8969   }
8970   case SystemZ::BI__builtin_s390_vfmssb:
8971   case SystemZ::BI__builtin_s390_vfmsdb: {
8972     llvm::Type *ResultType = ConvertType(E->getType());
8973     Value *X = EmitScalarExpr(E->getArg(0));
8974     Value *Y = EmitScalarExpr(E->getArg(1));
8975     Value *Z = EmitScalarExpr(E->getArg(2));
8976     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8977     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8978     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8979   }
8980   case SystemZ::BI__builtin_s390_vfnmasb:
8981   case SystemZ::BI__builtin_s390_vfnmadb: {
8982     llvm::Type *ResultType = ConvertType(E->getType());
8983     Value *X = EmitScalarExpr(E->getArg(0));
8984     Value *Y = EmitScalarExpr(E->getArg(1));
8985     Value *Z = EmitScalarExpr(E->getArg(2));
8986     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8987     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8988     return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub");
8989   }
8990   case SystemZ::BI__builtin_s390_vfnmssb:
8991   case SystemZ::BI__builtin_s390_vfnmsdb: {
8992     llvm::Type *ResultType = ConvertType(E->getType());
8993     Value *X = EmitScalarExpr(E->getArg(0));
8994     Value *Y = EmitScalarExpr(E->getArg(1));
8995     Value *Z = EmitScalarExpr(E->getArg(2));
8996     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8997     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8998     Value *NegZ = Builder.CreateFSub(Zero, Z, "sub");
8999     return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ}));
9000   }
9001   case SystemZ::BI__builtin_s390_vflpsb:
9002   case SystemZ::BI__builtin_s390_vflpdb: {
9003     llvm::Type *ResultType = ConvertType(E->getType());
9004     Value *X = EmitScalarExpr(E->getArg(0));
9005     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
9006     return Builder.CreateCall(F, X);
9007   }
9008   case SystemZ::BI__builtin_s390_vflnsb:
9009   case SystemZ::BI__builtin_s390_vflndb: {
9010     llvm::Type *ResultType = ConvertType(E->getType());
9011     Value *X = EmitScalarExpr(E->getArg(0));
9012     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
9013     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
9014     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
9015   }
9016   case SystemZ::BI__builtin_s390_vfisb:
9017   case SystemZ::BI__builtin_s390_vfidb: {
9018     llvm::Type *ResultType = ConvertType(E->getType());
9019     Value *X = EmitScalarExpr(E->getArg(0));
9020     // Constant-fold the M4 and M5 mask arguments.
9021     llvm::APSInt M4, M5;
9022     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
9023     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
9024     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
9025     (void)IsConstM4; (void)IsConstM5;
9026     // Check whether this instance can be represented via a LLVM standard
9027     // intrinsic.  We only support some combinations of M4 and M5.
9028     Intrinsic::ID ID = Intrinsic::not_intrinsic;
9029     switch (M4.getZExtValue()) {
9030     default: break;
9031     case 0:  // IEEE-inexact exception allowed
9032       switch (M5.getZExtValue()) {
9033       default: break;
9034       case 0: ID = Intrinsic::rint; break;
9035       }
9036       break;
9037     case 4:  // IEEE-inexact exception suppressed
9038       switch (M5.getZExtValue()) {
9039       default: break;
9040       case 0: ID = Intrinsic::nearbyint; break;
9041       case 1: ID = Intrinsic::round; break;
9042       case 5: ID = Intrinsic::trunc; break;
9043       case 6: ID = Intrinsic::ceil; break;
9044       case 7: ID = Intrinsic::floor; break;
9045       }
9046       break;
9047     }
9048     if (ID != Intrinsic::not_intrinsic) {
9049       Function *F = CGM.getIntrinsic(ID, ResultType);
9050       return Builder.CreateCall(F, X);
9051     }
9052     switch (BuiltinID) {
9053       case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
9054       case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
9055       default: llvm_unreachable("Unknown BuiltinID");
9056     }
9057     Function *F = CGM.getIntrinsic(ID);
9058     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
9059     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
9060     return Builder.CreateCall(F, {X, M4Value, M5Value});
9061   }
9062   case SystemZ::BI__builtin_s390_vfmaxsb:
9063   case SystemZ::BI__builtin_s390_vfmaxdb: {
9064     llvm::Type *ResultType = ConvertType(E->getType());
9065     Value *X = EmitScalarExpr(E->getArg(0));
9066     Value *Y = EmitScalarExpr(E->getArg(1));
9067     // Constant-fold the M4 mask argument.
9068     llvm::APSInt M4;
9069     bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
9070     assert(IsConstM4 && "Constant arg isn't actually constant?");
9071     (void)IsConstM4;
9072     // Check whether this instance can be represented via a LLVM standard
9073     // intrinsic.  We only support some values of M4.
9074     Intrinsic::ID ID = Intrinsic::not_intrinsic;
9075     switch (M4.getZExtValue()) {
9076     default: break;
9077     case 4: ID = Intrinsic::maxnum; break;
9078     }
9079     if (ID != Intrinsic::not_intrinsic) {
9080       Function *F = CGM.getIntrinsic(ID, ResultType);
9081       return Builder.CreateCall(F, {X, Y});
9082     }
9083     switch (BuiltinID) {
9084       case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
9085       case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
9086       default: llvm_unreachable("Unknown BuiltinID");
9087     }
9088     Function *F = CGM.getIntrinsic(ID);
9089     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
9090     return Builder.CreateCall(F, {X, Y, M4Value});
9091   }
9092   case SystemZ::BI__builtin_s390_vfminsb:
9093   case SystemZ::BI__builtin_s390_vfmindb: {
9094     llvm::Type *ResultType = ConvertType(E->getType());
9095     Value *X = EmitScalarExpr(E->getArg(0));
9096     Value *Y = EmitScalarExpr(E->getArg(1));
9097     // Constant-fold the M4 mask argument.
9098     llvm::APSInt M4;
9099     bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
9100     assert(IsConstM4 && "Constant arg isn't actually constant?");
9101     (void)IsConstM4;
9102     // Check whether this instance can be represented via a LLVM standard
9103     // intrinsic.  We only support some values of M4.
9104     Intrinsic::ID ID = Intrinsic::not_intrinsic;
9105     switch (M4.getZExtValue()) {
9106     default: break;
9107     case 4: ID = Intrinsic::minnum; break;
9108     }
9109     if (ID != Intrinsic::not_intrinsic) {
9110       Function *F = CGM.getIntrinsic(ID, ResultType);
9111       return Builder.CreateCall(F, {X, Y});
9112     }
9113     switch (BuiltinID) {
9114       case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
9115       case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
9116       default: llvm_unreachable("Unknown BuiltinID");
9117     }
9118     Function *F = CGM.getIntrinsic(ID);
9119     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
9120     return Builder.CreateCall(F, {X, Y, M4Value});
9121   }
9122 
9123   // Vector intrisincs that output the post-instruction CC value.
9124 
9125 #define INTRINSIC_WITH_CC(NAME) \
9126     case SystemZ::BI__builtin_##NAME: \
9127       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
9128 
9129   INTRINSIC_WITH_CC(s390_vpkshs);
9130   INTRINSIC_WITH_CC(s390_vpksfs);
9131   INTRINSIC_WITH_CC(s390_vpksgs);
9132 
9133   INTRINSIC_WITH_CC(s390_vpklshs);
9134   INTRINSIC_WITH_CC(s390_vpklsfs);
9135   INTRINSIC_WITH_CC(s390_vpklsgs);
9136 
9137   INTRINSIC_WITH_CC(s390_vceqbs);
9138   INTRINSIC_WITH_CC(s390_vceqhs);
9139   INTRINSIC_WITH_CC(s390_vceqfs);
9140   INTRINSIC_WITH_CC(s390_vceqgs);
9141 
9142   INTRINSIC_WITH_CC(s390_vchbs);
9143   INTRINSIC_WITH_CC(s390_vchhs);
9144   INTRINSIC_WITH_CC(s390_vchfs);
9145   INTRINSIC_WITH_CC(s390_vchgs);
9146 
9147   INTRINSIC_WITH_CC(s390_vchlbs);
9148   INTRINSIC_WITH_CC(s390_vchlhs);
9149   INTRINSIC_WITH_CC(s390_vchlfs);
9150   INTRINSIC_WITH_CC(s390_vchlgs);
9151 
9152   INTRINSIC_WITH_CC(s390_vfaebs);
9153   INTRINSIC_WITH_CC(s390_vfaehs);
9154   INTRINSIC_WITH_CC(s390_vfaefs);
9155 
9156   INTRINSIC_WITH_CC(s390_vfaezbs);
9157   INTRINSIC_WITH_CC(s390_vfaezhs);
9158   INTRINSIC_WITH_CC(s390_vfaezfs);
9159 
9160   INTRINSIC_WITH_CC(s390_vfeebs);
9161   INTRINSIC_WITH_CC(s390_vfeehs);
9162   INTRINSIC_WITH_CC(s390_vfeefs);
9163 
9164   INTRINSIC_WITH_CC(s390_vfeezbs);
9165   INTRINSIC_WITH_CC(s390_vfeezhs);
9166   INTRINSIC_WITH_CC(s390_vfeezfs);
9167 
9168   INTRINSIC_WITH_CC(s390_vfenebs);
9169   INTRINSIC_WITH_CC(s390_vfenehs);
9170   INTRINSIC_WITH_CC(s390_vfenefs);
9171 
9172   INTRINSIC_WITH_CC(s390_vfenezbs);
9173   INTRINSIC_WITH_CC(s390_vfenezhs);
9174   INTRINSIC_WITH_CC(s390_vfenezfs);
9175 
9176   INTRINSIC_WITH_CC(s390_vistrbs);
9177   INTRINSIC_WITH_CC(s390_vistrhs);
9178   INTRINSIC_WITH_CC(s390_vistrfs);
9179 
9180   INTRINSIC_WITH_CC(s390_vstrcbs);
9181   INTRINSIC_WITH_CC(s390_vstrchs);
9182   INTRINSIC_WITH_CC(s390_vstrcfs);
9183 
9184   INTRINSIC_WITH_CC(s390_vstrczbs);
9185   INTRINSIC_WITH_CC(s390_vstrczhs);
9186   INTRINSIC_WITH_CC(s390_vstrczfs);
9187 
9188   INTRINSIC_WITH_CC(s390_vfcesbs);
9189   INTRINSIC_WITH_CC(s390_vfcedbs);
9190   INTRINSIC_WITH_CC(s390_vfchsbs);
9191   INTRINSIC_WITH_CC(s390_vfchdbs);
9192   INTRINSIC_WITH_CC(s390_vfchesbs);
9193   INTRINSIC_WITH_CC(s390_vfchedbs);
9194 
9195   INTRINSIC_WITH_CC(s390_vftcisb);
9196   INTRINSIC_WITH_CC(s390_vftcidb);
9197 
9198 #undef INTRINSIC_WITH_CC
9199 
9200   default:
9201     return nullptr;
9202   }
9203 }
9204 
9205 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
9206                                              const CallExpr *E) {
9207   auto MakeLdg = [&](unsigned IntrinsicID) {
9208     Value *Ptr = EmitScalarExpr(E->getArg(0));
9209     clang::CharUnits Align =
9210         getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
9211     return Builder.CreateCall(
9212         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
9213                                        Ptr->getType()}),
9214         {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
9215   };
9216   auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
9217     Value *Ptr = EmitScalarExpr(E->getArg(0));
9218     return Builder.CreateCall(
9219         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
9220                                        Ptr->getType()}),
9221         {Ptr, EmitScalarExpr(E->getArg(1))});
9222   };
9223   switch (BuiltinID) {
9224   case NVPTX::BI__nvvm_atom_add_gen_i:
9225   case NVPTX::BI__nvvm_atom_add_gen_l:
9226   case NVPTX::BI__nvvm_atom_add_gen_ll:
9227     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
9228 
9229   case NVPTX::BI__nvvm_atom_sub_gen_i:
9230   case NVPTX::BI__nvvm_atom_sub_gen_l:
9231   case NVPTX::BI__nvvm_atom_sub_gen_ll:
9232     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
9233 
9234   case NVPTX::BI__nvvm_atom_and_gen_i:
9235   case NVPTX::BI__nvvm_atom_and_gen_l:
9236   case NVPTX::BI__nvvm_atom_and_gen_ll:
9237     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
9238 
9239   case NVPTX::BI__nvvm_atom_or_gen_i:
9240   case NVPTX::BI__nvvm_atom_or_gen_l:
9241   case NVPTX::BI__nvvm_atom_or_gen_ll:
9242     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
9243 
9244   case NVPTX::BI__nvvm_atom_xor_gen_i:
9245   case NVPTX::BI__nvvm_atom_xor_gen_l:
9246   case NVPTX::BI__nvvm_atom_xor_gen_ll:
9247     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
9248 
9249   case NVPTX::BI__nvvm_atom_xchg_gen_i:
9250   case NVPTX::BI__nvvm_atom_xchg_gen_l:
9251   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
9252     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
9253 
9254   case NVPTX::BI__nvvm_atom_max_gen_i:
9255   case NVPTX::BI__nvvm_atom_max_gen_l:
9256   case NVPTX::BI__nvvm_atom_max_gen_ll:
9257     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
9258 
9259   case NVPTX::BI__nvvm_atom_max_gen_ui:
9260   case NVPTX::BI__nvvm_atom_max_gen_ul:
9261   case NVPTX::BI__nvvm_atom_max_gen_ull:
9262     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
9263 
9264   case NVPTX::BI__nvvm_atom_min_gen_i:
9265   case NVPTX::BI__nvvm_atom_min_gen_l:
9266   case NVPTX::BI__nvvm_atom_min_gen_ll:
9267     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
9268 
9269   case NVPTX::BI__nvvm_atom_min_gen_ui:
9270   case NVPTX::BI__nvvm_atom_min_gen_ul:
9271   case NVPTX::BI__nvvm_atom_min_gen_ull:
9272     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
9273 
9274   case NVPTX::BI__nvvm_atom_cas_gen_i:
9275   case NVPTX::BI__nvvm_atom_cas_gen_l:
9276   case NVPTX::BI__nvvm_atom_cas_gen_ll:
9277     // __nvvm_atom_cas_gen_* should return the old value rather than the
9278     // success flag.
9279     return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
9280 
9281   case NVPTX::BI__nvvm_atom_add_gen_f: {
9282     Value *Ptr = EmitScalarExpr(E->getArg(0));
9283     Value *Val = EmitScalarExpr(E->getArg(1));
9284     // atomicrmw only deals with integer arguments so we need to use
9285     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
9286     Value *FnALAF32 =
9287         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
9288     return Builder.CreateCall(FnALAF32, {Ptr, Val});
9289   }
9290 
9291   case NVPTX::BI__nvvm_atom_inc_gen_ui: {
9292     Value *Ptr = EmitScalarExpr(E->getArg(0));
9293     Value *Val = EmitScalarExpr(E->getArg(1));
9294     Value *FnALI32 =
9295         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
9296     return Builder.CreateCall(FnALI32, {Ptr, Val});
9297   }
9298 
9299   case NVPTX::BI__nvvm_atom_dec_gen_ui: {
9300     Value *Ptr = EmitScalarExpr(E->getArg(0));
9301     Value *Val = EmitScalarExpr(E->getArg(1));
9302     Value *FnALD32 =
9303         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
9304     return Builder.CreateCall(FnALD32, {Ptr, Val});
9305   }
9306 
9307   case NVPTX::BI__nvvm_ldg_c:
9308   case NVPTX::BI__nvvm_ldg_c2:
9309   case NVPTX::BI__nvvm_ldg_c4:
9310   case NVPTX::BI__nvvm_ldg_s:
9311   case NVPTX::BI__nvvm_ldg_s2:
9312   case NVPTX::BI__nvvm_ldg_s4:
9313   case NVPTX::BI__nvvm_ldg_i:
9314   case NVPTX::BI__nvvm_ldg_i2:
9315   case NVPTX::BI__nvvm_ldg_i4:
9316   case NVPTX::BI__nvvm_ldg_l:
9317   case NVPTX::BI__nvvm_ldg_ll:
9318   case NVPTX::BI__nvvm_ldg_ll2:
9319   case NVPTX::BI__nvvm_ldg_uc:
9320   case NVPTX::BI__nvvm_ldg_uc2:
9321   case NVPTX::BI__nvvm_ldg_uc4:
9322   case NVPTX::BI__nvvm_ldg_us:
9323   case NVPTX::BI__nvvm_ldg_us2:
9324   case NVPTX::BI__nvvm_ldg_us4:
9325   case NVPTX::BI__nvvm_ldg_ui:
9326   case NVPTX::BI__nvvm_ldg_ui2:
9327   case NVPTX::BI__nvvm_ldg_ui4:
9328   case NVPTX::BI__nvvm_ldg_ul:
9329   case NVPTX::BI__nvvm_ldg_ull:
9330   case NVPTX::BI__nvvm_ldg_ull2:
9331     // PTX Interoperability section 2.2: "For a vector with an even number of
9332     // elements, its alignment is set to number of elements times the alignment
9333     // of its member: n*alignof(t)."
9334     return MakeLdg(Intrinsic::nvvm_ldg_global_i);
9335   case NVPTX::BI__nvvm_ldg_f:
9336   case NVPTX::BI__nvvm_ldg_f2:
9337   case NVPTX::BI__nvvm_ldg_f4:
9338   case NVPTX::BI__nvvm_ldg_d:
9339   case NVPTX::BI__nvvm_ldg_d2:
9340     return MakeLdg(Intrinsic::nvvm_ldg_global_f);
9341 
9342   case NVPTX::BI__nvvm_atom_cta_add_gen_i:
9343   case NVPTX::BI__nvvm_atom_cta_add_gen_l:
9344   case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
9345     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
9346   case NVPTX::BI__nvvm_atom_sys_add_gen_i:
9347   case NVPTX::BI__nvvm_atom_sys_add_gen_l:
9348   case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
9349     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
9350   case NVPTX::BI__nvvm_atom_cta_add_gen_f:
9351   case NVPTX::BI__nvvm_atom_cta_add_gen_d:
9352     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
9353   case NVPTX::BI__nvvm_atom_sys_add_gen_f:
9354   case NVPTX::BI__nvvm_atom_sys_add_gen_d:
9355     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
9356   case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
9357   case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
9358   case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
9359     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
9360   case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
9361   case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
9362   case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
9363     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
9364   case NVPTX::BI__nvvm_atom_cta_max_gen_i:
9365   case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
9366   case NVPTX::BI__nvvm_atom_cta_max_gen_l:
9367   case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
9368   case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
9369   case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
9370     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
9371   case NVPTX::BI__nvvm_atom_sys_max_gen_i:
9372   case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
9373   case NVPTX::BI__nvvm_atom_sys_max_gen_l:
9374   case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
9375   case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
9376   case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
9377     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
9378   case NVPTX::BI__nvvm_atom_cta_min_gen_i:
9379   case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
9380   case NVPTX::BI__nvvm_atom_cta_min_gen_l:
9381   case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
9382   case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
9383   case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
9384     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
9385   case NVPTX::BI__nvvm_atom_sys_min_gen_i:
9386   case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
9387   case NVPTX::BI__nvvm_atom_sys_min_gen_l:
9388   case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
9389   case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
9390   case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
9391     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
9392   case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
9393     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
9394   case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
9395     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
9396   case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
9397     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
9398   case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
9399     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
9400   case NVPTX::BI__nvvm_atom_cta_and_gen_i:
9401   case NVPTX::BI__nvvm_atom_cta_and_gen_l:
9402   case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
9403     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
9404   case NVPTX::BI__nvvm_atom_sys_and_gen_i:
9405   case NVPTX::BI__nvvm_atom_sys_and_gen_l:
9406   case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
9407     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
9408   case NVPTX::BI__nvvm_atom_cta_or_gen_i:
9409   case NVPTX::BI__nvvm_atom_cta_or_gen_l:
9410   case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
9411     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
9412   case NVPTX::BI__nvvm_atom_sys_or_gen_i:
9413   case NVPTX::BI__nvvm_atom_sys_or_gen_l:
9414   case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
9415     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
9416   case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
9417   case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
9418   case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
9419     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
9420   case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
9421   case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
9422   case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
9423     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
9424   case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
9425   case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
9426   case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
9427     Value *Ptr = EmitScalarExpr(E->getArg(0));
9428     return Builder.CreateCall(
9429         CGM.getIntrinsic(
9430             Intrinsic::nvvm_atomic_cas_gen_i_cta,
9431             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9432         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9433   }
9434   case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
9435   case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
9436   case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
9437     Value *Ptr = EmitScalarExpr(E->getArg(0));
9438     return Builder.CreateCall(
9439         CGM.getIntrinsic(
9440             Intrinsic::nvvm_atomic_cas_gen_i_sys,
9441             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9442         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9443   }
9444   default:
9445     return nullptr;
9446   }
9447 }
9448 
9449 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
9450                                                    const CallExpr *E) {
9451   switch (BuiltinID) {
9452   case WebAssembly::BI__builtin_wasm_current_memory: {
9453     llvm::Type *ResultType = ConvertType(E->getType());
9454     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
9455     return Builder.CreateCall(Callee);
9456   }
9457   case WebAssembly::BI__builtin_wasm_grow_memory: {
9458     Value *X = EmitScalarExpr(E->getArg(0));
9459     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
9460     return Builder.CreateCall(Callee, X);
9461   }
9462   case WebAssembly::BI__builtin_wasm_throw: {
9463     Value *Tag = EmitScalarExpr(E->getArg(0));
9464     Value *Obj = EmitScalarExpr(E->getArg(1));
9465     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
9466     return Builder.CreateCall(Callee, {Tag, Obj});
9467   }
9468   case WebAssembly::BI__builtin_wasm_rethrow: {
9469     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
9470     return Builder.CreateCall(Callee);
9471   }
9472 
9473   default:
9474     return nullptr;
9475   }
9476 }
9477