1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGObjCRuntime.h"
16 #include "CGOpenCLRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "CodeGenModule.h"
19 #include "ConstantEmitter.h"
20 #include "TargetInfo.h"
21 #include "clang/AST/ASTContext.h"
22 #include "clang/AST/Decl.h"
23 #include "clang/Analysis/Analyses/OSLog.h"
24 #include "clang/Basic/TargetBuiltins.h"
25 #include "clang/Basic/TargetInfo.h"
26 #include "clang/CodeGen/CGFunctionInfo.h"
27 #include "llvm/ADT/StringExtras.h"
28 #include "llvm/IR/CallSite.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/InlineAsm.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/MDBuilder.h"
33 #include "llvm/Support/ConvertUTF.h"
34 #include <sstream>
35 
36 using namespace clang;
37 using namespace CodeGen;
38 using namespace llvm;
39 
40 static
41 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
42   return std::min(High, std::max(Low, Value));
43 }
44 
45 /// getBuiltinLibFunction - Given a builtin id for a function like
46 /// "__builtin_fabsf", return a Function* for "fabsf".
47 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
48                                                      unsigned BuiltinID) {
49   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
50 
51   // Get the name, skip over the __builtin_ prefix (if necessary).
52   StringRef Name;
53   GlobalDecl D(FD);
54 
55   // If the builtin has been declared explicitly with an assembler label,
56   // use the mangled name. This differs from the plain label on platforms
57   // that prefix labels.
58   if (FD->hasAttr<AsmLabelAttr>())
59     Name = getMangledName(D);
60   else
61     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
62 
63   llvm::FunctionType *Ty =
64     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
65 
66   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
67 }
68 
69 /// Emit the conversions required to turn the given value into an
70 /// integer of the given size.
71 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
72                         QualType T, llvm::IntegerType *IntType) {
73   V = CGF.EmitToMemory(V, T);
74 
75   if (V->getType()->isPointerTy())
76     return CGF.Builder.CreatePtrToInt(V, IntType);
77 
78   assert(V->getType() == IntType);
79   return V;
80 }
81 
82 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
83                           QualType T, llvm::Type *ResultType) {
84   V = CGF.EmitFromMemory(V, T);
85 
86   if (ResultType->isPointerTy())
87     return CGF.Builder.CreateIntToPtr(V, ResultType);
88 
89   assert(V->getType() == ResultType);
90   return V;
91 }
92 
93 /// Utility to insert an atomic instruction based on Instrinsic::ID
94 /// and the expression node.
95 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
96                                     llvm::AtomicRMWInst::BinOp Kind,
97                                     const CallExpr *E) {
98   QualType T = E->getType();
99   assert(E->getArg(0)->getType()->isPointerType());
100   assert(CGF.getContext().hasSameUnqualifiedType(T,
101                                   E->getArg(0)->getType()->getPointeeType()));
102   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
103 
104   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
105   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
106 
107   llvm::IntegerType *IntType =
108     llvm::IntegerType::get(CGF.getLLVMContext(),
109                            CGF.getContext().getTypeSize(T));
110   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
111 
112   llvm::Value *Args[2];
113   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
114   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
115   llvm::Type *ValueType = Args[1]->getType();
116   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
117 
118   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
119       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
120   return EmitFromInt(CGF, Result, T, ValueType);
121 }
122 
123 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
124   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
125   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
126 
127   // Convert the type of the pointer to a pointer to the stored type.
128   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
129   Value *BC = CGF.Builder.CreateBitCast(
130       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
131   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
132   LV.setNontemporal(true);
133   CGF.EmitStoreOfScalar(Val, LV, false);
134   return nullptr;
135 }
136 
137 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
138   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
139 
140   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
141   LV.setNontemporal(true);
142   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
143 }
144 
145 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
146                                llvm::AtomicRMWInst::BinOp Kind,
147                                const CallExpr *E) {
148   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
149 }
150 
151 /// Utility to insert an atomic instruction based Instrinsic::ID and
152 /// the expression node, where the return value is the result of the
153 /// operation.
154 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
155                                    llvm::AtomicRMWInst::BinOp Kind,
156                                    const CallExpr *E,
157                                    Instruction::BinaryOps Op,
158                                    bool Invert = false) {
159   QualType T = E->getType();
160   assert(E->getArg(0)->getType()->isPointerType());
161   assert(CGF.getContext().hasSameUnqualifiedType(T,
162                                   E->getArg(0)->getType()->getPointeeType()));
163   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
164 
165   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
166   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
167 
168   llvm::IntegerType *IntType =
169     llvm::IntegerType::get(CGF.getLLVMContext(),
170                            CGF.getContext().getTypeSize(T));
171   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
172 
173   llvm::Value *Args[2];
174   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
175   llvm::Type *ValueType = Args[1]->getType();
176   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
177   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
178 
179   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
180       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
181   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
182   if (Invert)
183     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
184                                      llvm::ConstantInt::get(IntType, -1));
185   Result = EmitFromInt(CGF, Result, T, ValueType);
186   return RValue::get(Result);
187 }
188 
189 /// @brief Utility to insert an atomic cmpxchg instruction.
190 ///
191 /// @param CGF The current codegen function.
192 /// @param E   Builtin call expression to convert to cmpxchg.
193 ///            arg0 - address to operate on
194 ///            arg1 - value to compare with
195 ///            arg2 - new value
196 /// @param ReturnBool Specifies whether to return success flag of
197 ///                   cmpxchg result or the old value.
198 ///
199 /// @returns result of cmpxchg, according to ReturnBool
200 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
201                                      bool ReturnBool) {
202   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
203   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
204   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
205 
206   llvm::IntegerType *IntType = llvm::IntegerType::get(
207       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
208   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
209 
210   Value *Args[3];
211   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
212   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
213   llvm::Type *ValueType = Args[1]->getType();
214   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
215   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
216 
217   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
218       Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
219       llvm::AtomicOrdering::SequentiallyConsistent);
220   if (ReturnBool)
221     // Extract boolean success flag and zext it to int.
222     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
223                                   CGF.ConvertType(E->getType()));
224   else
225     // Extract old value and emit it using the same type as compare value.
226     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
227                        ValueType);
228 }
229 
230 // Emit a simple mangled intrinsic that has 1 argument and a return type
231 // matching the argument type.
232 static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
233                                const CallExpr *E,
234                                unsigned IntrinsicID) {
235   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
236 
237   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
238   return CGF.Builder.CreateCall(F, Src0);
239 }
240 
241 // Emit an intrinsic that has 2 operands of the same type as its result.
242 static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
243                                 const CallExpr *E,
244                                 unsigned IntrinsicID) {
245   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
246   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
247 
248   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
249   return CGF.Builder.CreateCall(F, { Src0, Src1 });
250 }
251 
252 // Emit an intrinsic that has 3 operands of the same type as its result.
253 static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
254                                  const CallExpr *E,
255                                  unsigned IntrinsicID) {
256   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
257   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
258   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
259 
260   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
261   return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
262 }
263 
264 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
265 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
266                                const CallExpr *E,
267                                unsigned IntrinsicID) {
268   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
269   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
270 
271   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
272   return CGF.Builder.CreateCall(F, {Src0, Src1});
273 }
274 
275 /// EmitFAbs - Emit a call to @llvm.fabs().
276 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
277   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
278   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
279   Call->setDoesNotAccessMemory();
280   return Call;
281 }
282 
283 /// Emit the computation of the sign bit for a floating point value. Returns
284 /// the i1 sign bit value.
285 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
286   LLVMContext &C = CGF.CGM.getLLVMContext();
287 
288   llvm::Type *Ty = V->getType();
289   int Width = Ty->getPrimitiveSizeInBits();
290   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
291   V = CGF.Builder.CreateBitCast(V, IntTy);
292   if (Ty->isPPC_FP128Ty()) {
293     // We want the sign bit of the higher-order double. The bitcast we just
294     // did works as if the double-double was stored to memory and then
295     // read as an i128. The "store" will put the higher-order double in the
296     // lower address in both little- and big-Endian modes, but the "load"
297     // will treat those bits as a different part of the i128: the low bits in
298     // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
299     // we need to shift the high bits down to the low before truncating.
300     Width >>= 1;
301     if (CGF.getTarget().isBigEndian()) {
302       Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
303       V = CGF.Builder.CreateLShr(V, ShiftCst);
304     }
305     // We are truncating value in order to extract the higher-order
306     // double, which we will be using to extract the sign from.
307     IntTy = llvm::IntegerType::get(C, Width);
308     V = CGF.Builder.CreateTrunc(V, IntTy);
309   }
310   Value *Zero = llvm::Constant::getNullValue(IntTy);
311   return CGF.Builder.CreateICmpSLT(V, Zero);
312 }
313 
314 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
315                               const CallExpr *E, llvm::Constant *calleeValue) {
316   CGCallee callee = CGCallee::forDirect(calleeValue, FD);
317   return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
318 }
319 
320 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
321 /// depending on IntrinsicID.
322 ///
323 /// \arg CGF The current codegen function.
324 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
325 /// \arg X The first argument to the llvm.*.with.overflow.*.
326 /// \arg Y The second argument to the llvm.*.with.overflow.*.
327 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
328 /// \returns The result (i.e. sum/product) returned by the intrinsic.
329 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
330                                           const llvm::Intrinsic::ID IntrinsicID,
331                                           llvm::Value *X, llvm::Value *Y,
332                                           llvm::Value *&Carry) {
333   // Make sure we have integers of the same width.
334   assert(X->getType() == Y->getType() &&
335          "Arguments must be the same type. (Did you forget to make sure both "
336          "arguments have the same integer width?)");
337 
338   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
339   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
340   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
341   return CGF.Builder.CreateExtractValue(Tmp, 0);
342 }
343 
344 static Value *emitRangedBuiltin(CodeGenFunction &CGF,
345                                 unsigned IntrinsicID,
346                                 int low, int high) {
347     llvm::MDBuilder MDHelper(CGF.getLLVMContext());
348     llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
349     Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
350     llvm::Instruction *Call = CGF.Builder.CreateCall(F);
351     Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
352     return Call;
353 }
354 
355 namespace {
356   struct WidthAndSignedness {
357     unsigned Width;
358     bool Signed;
359   };
360 }
361 
362 static WidthAndSignedness
363 getIntegerWidthAndSignedness(const clang::ASTContext &context,
364                              const clang::QualType Type) {
365   assert(Type->isIntegerType() && "Given type is not an integer.");
366   unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
367   bool Signed = Type->isSignedIntegerType();
368   return {Width, Signed};
369 }
370 
371 // Given one or more integer types, this function produces an integer type that
372 // encompasses them: any value in one of the given types could be expressed in
373 // the encompassing type.
374 static struct WidthAndSignedness
375 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
376   assert(Types.size() > 0 && "Empty list of types.");
377 
378   // If any of the given types is signed, we must return a signed type.
379   bool Signed = false;
380   for (const auto &Type : Types) {
381     Signed |= Type.Signed;
382   }
383 
384   // The encompassing type must have a width greater than or equal to the width
385   // of the specified types.  Aditionally, if the encompassing type is signed,
386   // its width must be strictly greater than the width of any unsigned types
387   // given.
388   unsigned Width = 0;
389   for (const auto &Type : Types) {
390     unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
391     if (Width < MinWidth) {
392       Width = MinWidth;
393     }
394   }
395 
396   return {Width, Signed};
397 }
398 
399 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
400   llvm::Type *DestType = Int8PtrTy;
401   if (ArgValue->getType() != DestType)
402     ArgValue =
403         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
404 
405   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
406   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
407 }
408 
409 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
410 /// __builtin_object_size(p, @p To) is correct
411 static bool areBOSTypesCompatible(int From, int To) {
412   // Note: Our __builtin_object_size implementation currently treats Type=0 and
413   // Type=2 identically. Encoding this implementation detail here may make
414   // improving __builtin_object_size difficult in the future, so it's omitted.
415   return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
416 }
417 
418 static llvm::Value *
419 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
420   return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
421 }
422 
423 llvm::Value *
424 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
425                                                  llvm::IntegerType *ResType,
426                                                  llvm::Value *EmittedE) {
427   uint64_t ObjectSize;
428   if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
429     return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
430   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
431 }
432 
433 /// Returns a Value corresponding to the size of the given expression.
434 /// This Value may be either of the following:
435 ///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
436 ///     it)
437 ///   - A call to the @llvm.objectsize intrinsic
438 ///
439 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
440 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
441 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
442 llvm::Value *
443 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
444                                        llvm::IntegerType *ResType,
445                                        llvm::Value *EmittedE) {
446   // We need to reference an argument if the pointer is a parameter with the
447   // pass_object_size attribute.
448   if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
449     auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
450     auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
451     if (Param != nullptr && PS != nullptr &&
452         areBOSTypesCompatible(PS->getType(), Type)) {
453       auto Iter = SizeArguments.find(Param);
454       assert(Iter != SizeArguments.end());
455 
456       const ImplicitParamDecl *D = Iter->second;
457       auto DIter = LocalDeclMap.find(D);
458       assert(DIter != LocalDeclMap.end());
459 
460       return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
461                               getContext().getSizeType(), E->getLocStart());
462     }
463   }
464 
465   // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
466   // evaluate E for side-effects. In either case, we shouldn't lower to
467   // @llvm.objectsize.
468   if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
469     return getDefaultBuiltinObjectSizeResult(Type, ResType);
470 
471   Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
472   assert(Ptr->getType()->isPointerTy() &&
473          "Non-pointer passed to __builtin_object_size?");
474 
475   Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
476 
477   // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
478   Value *Min = Builder.getInt1((Type & 2) != 0);
479   // For GCC compatability, __builtin_object_size treat NULL as unknown size.
480   Value *NullIsUnknown = Builder.getTrue();
481   return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
482 }
483 
484 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
485 // handle them here.
486 enum class CodeGenFunction::MSVCIntrin {
487   _BitScanForward,
488   _BitScanReverse,
489   _InterlockedAnd,
490   _InterlockedDecrement,
491   _InterlockedExchange,
492   _InterlockedExchangeAdd,
493   _InterlockedExchangeSub,
494   _InterlockedIncrement,
495   _InterlockedOr,
496   _InterlockedXor,
497   _interlockedbittestandset,
498   __fastfail,
499 };
500 
501 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
502                                             const CallExpr *E) {
503   switch (BuiltinID) {
504   case MSVCIntrin::_BitScanForward:
505   case MSVCIntrin::_BitScanReverse: {
506     Value *ArgValue = EmitScalarExpr(E->getArg(1));
507 
508     llvm::Type *ArgType = ArgValue->getType();
509     llvm::Type *IndexType =
510       EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
511     llvm::Type *ResultType = ConvertType(E->getType());
512 
513     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
514     Value *ResZero = llvm::Constant::getNullValue(ResultType);
515     Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
516 
517     BasicBlock *Begin = Builder.GetInsertBlock();
518     BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
519     Builder.SetInsertPoint(End);
520     PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
521 
522     Builder.SetInsertPoint(Begin);
523     Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
524     BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
525     Builder.CreateCondBr(IsZero, End, NotZero);
526     Result->addIncoming(ResZero, Begin);
527 
528     Builder.SetInsertPoint(NotZero);
529     Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
530 
531     if (BuiltinID == MSVCIntrin::_BitScanForward) {
532       Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
533       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
534       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
535       Builder.CreateStore(ZeroCount, IndexAddress, false);
536     } else {
537       unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
538       Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
539 
540       Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
541       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
542       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
543       Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
544       Builder.CreateStore(Index, IndexAddress, false);
545     }
546     Builder.CreateBr(End);
547     Result->addIncoming(ResOne, NotZero);
548 
549     Builder.SetInsertPoint(End);
550     return Result;
551   }
552   case MSVCIntrin::_InterlockedAnd:
553     return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
554   case MSVCIntrin::_InterlockedExchange:
555     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
556   case MSVCIntrin::_InterlockedExchangeAdd:
557     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
558   case MSVCIntrin::_InterlockedExchangeSub:
559     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
560   case MSVCIntrin::_InterlockedOr:
561     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
562   case MSVCIntrin::_InterlockedXor:
563     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
564 
565   case MSVCIntrin::_interlockedbittestandset: {
566     llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
567     llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
568     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
569         AtomicRMWInst::Or, Addr,
570         Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
571         llvm::AtomicOrdering::SequentiallyConsistent);
572     // Shift the relevant bit to the least significant position, truncate to
573     // the result type, and test the low bit.
574     llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
575     llvm::Value *Truncated =
576         Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
577     return Builder.CreateAnd(Truncated,
578                              ConstantInt::get(Truncated->getType(), 1));
579   }
580 
581   case MSVCIntrin::_InterlockedDecrement: {
582     llvm::Type *IntTy = ConvertType(E->getType());
583     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
584       AtomicRMWInst::Sub,
585       EmitScalarExpr(E->getArg(0)),
586       ConstantInt::get(IntTy, 1),
587       llvm::AtomicOrdering::SequentiallyConsistent);
588     return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
589   }
590   case MSVCIntrin::_InterlockedIncrement: {
591     llvm::Type *IntTy = ConvertType(E->getType());
592     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
593       AtomicRMWInst::Add,
594       EmitScalarExpr(E->getArg(0)),
595       ConstantInt::get(IntTy, 1),
596       llvm::AtomicOrdering::SequentiallyConsistent);
597     return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
598   }
599 
600   case MSVCIntrin::__fastfail: {
601     // Request immediate process termination from the kernel. The instruction
602     // sequences to do this are documented on MSDN:
603     // https://msdn.microsoft.com/en-us/library/dn774154.aspx
604     llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
605     StringRef Asm, Constraints;
606     switch (ISA) {
607     default:
608       ErrorUnsupported(E, "__fastfail call for this architecture");
609       break;
610     case llvm::Triple::x86:
611     case llvm::Triple::x86_64:
612       Asm = "int $$0x29";
613       Constraints = "{cx}";
614       break;
615     case llvm::Triple::thumb:
616       Asm = "udf #251";
617       Constraints = "{r0}";
618       break;
619     }
620     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
621     llvm::InlineAsm *IA =
622         llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
623     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
624         getLLVMContext(), llvm::AttributeList::FunctionIndex,
625         llvm::Attribute::NoReturn);
626     CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
627     CS.setAttributes(NoReturnAttr);
628     return CS.getInstruction();
629   }
630   }
631   llvm_unreachable("Incorrect MSVC intrinsic!");
632 }
633 
634 namespace {
635 // ARC cleanup for __builtin_os_log_format
636 struct CallObjCArcUse final : EHScopeStack::Cleanup {
637   CallObjCArcUse(llvm::Value *object) : object(object) {}
638   llvm::Value *object;
639 
640   void Emit(CodeGenFunction &CGF, Flags flags) override {
641     CGF.EmitARCIntrinsicUse(object);
642   }
643 };
644 }
645 
646 Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
647                                                  BuiltinCheckKind Kind) {
648   assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
649           && "Unsupported builtin check kind");
650 
651   Value *ArgValue = EmitScalarExpr(E);
652   if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef())
653     return ArgValue;
654 
655   SanitizerScope SanScope(this);
656   Value *Cond = Builder.CreateICmpNE(
657       ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
658   EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
659             SanitizerHandler::InvalidBuiltin,
660             {EmitCheckSourceLocation(E->getExprLoc()),
661              llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
662             None);
663   return ArgValue;
664 }
665 
666 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
667                                         unsigned BuiltinID, const CallExpr *E,
668                                         ReturnValueSlot ReturnValue) {
669   // See if we can constant fold this builtin.  If so, don't emit it at all.
670   Expr::EvalResult Result;
671   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
672       !Result.hasSideEffects()) {
673     if (Result.Val.isInt())
674       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
675                                                 Result.Val.getInt()));
676     if (Result.Val.isFloat())
677       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
678                                                Result.Val.getFloat()));
679   }
680 
681   switch (BuiltinID) {
682   default: break;  // Handle intrinsics and libm functions below.
683   case Builtin::BI__builtin___CFStringMakeConstantString:
684   case Builtin::BI__builtin___NSStringMakeConstantString:
685     return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
686   case Builtin::BI__builtin_stdarg_start:
687   case Builtin::BI__builtin_va_start:
688   case Builtin::BI__va_start:
689   case Builtin::BI__builtin_va_end:
690     return RValue::get(
691         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
692                            ? EmitScalarExpr(E->getArg(0))
693                            : EmitVAListRef(E->getArg(0)).getPointer(),
694                        BuiltinID != Builtin::BI__builtin_va_end));
695   case Builtin::BI__builtin_va_copy: {
696     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
697     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
698 
699     llvm::Type *Type = Int8PtrTy;
700 
701     DstPtr = Builder.CreateBitCast(DstPtr, Type);
702     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
703     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
704                                           {DstPtr, SrcPtr}));
705   }
706   case Builtin::BI__builtin_abs:
707   case Builtin::BI__builtin_labs:
708   case Builtin::BI__builtin_llabs: {
709     Value *ArgValue = EmitScalarExpr(E->getArg(0));
710 
711     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
712     Value *CmpResult =
713     Builder.CreateICmpSGE(ArgValue,
714                           llvm::Constant::getNullValue(ArgValue->getType()),
715                                                             "abscond");
716     Value *Result =
717       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
718 
719     return RValue::get(Result);
720   }
721   case Builtin::BI__builtin_fabs:
722   case Builtin::BI__builtin_fabsf:
723   case Builtin::BI__builtin_fabsl: {
724     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
725   }
726   case Builtin::BI__builtin_fmod:
727   case Builtin::BI__builtin_fmodf:
728   case Builtin::BI__builtin_fmodl: {
729     Value *Arg1 = EmitScalarExpr(E->getArg(0));
730     Value *Arg2 = EmitScalarExpr(E->getArg(1));
731     Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
732     return RValue::get(Result);
733   }
734   case Builtin::BI__builtin_copysign:
735   case Builtin::BI__builtin_copysignf:
736   case Builtin::BI__builtin_copysignl: {
737     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
738   }
739   case Builtin::BI__builtin_ceil:
740   case Builtin::BI__builtin_ceilf:
741   case Builtin::BI__builtin_ceill: {
742     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
743   }
744   case Builtin::BI__builtin_floor:
745   case Builtin::BI__builtin_floorf:
746   case Builtin::BI__builtin_floorl: {
747     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
748   }
749   case Builtin::BI__builtin_trunc:
750   case Builtin::BI__builtin_truncf:
751   case Builtin::BI__builtin_truncl: {
752     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
753   }
754   case Builtin::BI__builtin_rint:
755   case Builtin::BI__builtin_rintf:
756   case Builtin::BI__builtin_rintl: {
757     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
758   }
759   case Builtin::BI__builtin_nearbyint:
760   case Builtin::BI__builtin_nearbyintf:
761   case Builtin::BI__builtin_nearbyintl: {
762     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
763   }
764   case Builtin::BI__builtin_round:
765   case Builtin::BI__builtin_roundf:
766   case Builtin::BI__builtin_roundl: {
767     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
768   }
769   case Builtin::BI__builtin_fmin:
770   case Builtin::BI__builtin_fminf:
771   case Builtin::BI__builtin_fminl: {
772     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
773   }
774   case Builtin::BI__builtin_fmax:
775   case Builtin::BI__builtin_fmaxf:
776   case Builtin::BI__builtin_fmaxl: {
777     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
778   }
779   case Builtin::BI__builtin_conj:
780   case Builtin::BI__builtin_conjf:
781   case Builtin::BI__builtin_conjl: {
782     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
783     Value *Real = ComplexVal.first;
784     Value *Imag = ComplexVal.second;
785     Value *Zero =
786       Imag->getType()->isFPOrFPVectorTy()
787         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
788         : llvm::Constant::getNullValue(Imag->getType());
789 
790     Imag = Builder.CreateFSub(Zero, Imag, "sub");
791     return RValue::getComplex(std::make_pair(Real, Imag));
792   }
793   case Builtin::BI__builtin_creal:
794   case Builtin::BI__builtin_crealf:
795   case Builtin::BI__builtin_creall:
796   case Builtin::BIcreal:
797   case Builtin::BIcrealf:
798   case Builtin::BIcreall: {
799     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
800     return RValue::get(ComplexVal.first);
801   }
802 
803   case Builtin::BI__builtin_cimag:
804   case Builtin::BI__builtin_cimagf:
805   case Builtin::BI__builtin_cimagl:
806   case Builtin::BIcimag:
807   case Builtin::BIcimagf:
808   case Builtin::BIcimagl: {
809     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
810     return RValue::get(ComplexVal.second);
811   }
812 
813   case Builtin::BI__builtin_ctzs:
814   case Builtin::BI__builtin_ctz:
815   case Builtin::BI__builtin_ctzl:
816   case Builtin::BI__builtin_ctzll: {
817     Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
818 
819     llvm::Type *ArgType = ArgValue->getType();
820     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
821 
822     llvm::Type *ResultType = ConvertType(E->getType());
823     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
824     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
825     if (Result->getType() != ResultType)
826       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
827                                      "cast");
828     return RValue::get(Result);
829   }
830   case Builtin::BI__builtin_clzs:
831   case Builtin::BI__builtin_clz:
832   case Builtin::BI__builtin_clzl:
833   case Builtin::BI__builtin_clzll: {
834     Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
835 
836     llvm::Type *ArgType = ArgValue->getType();
837     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
838 
839     llvm::Type *ResultType = ConvertType(E->getType());
840     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
841     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
842     if (Result->getType() != ResultType)
843       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
844                                      "cast");
845     return RValue::get(Result);
846   }
847   case Builtin::BI__builtin_ffs:
848   case Builtin::BI__builtin_ffsl:
849   case Builtin::BI__builtin_ffsll: {
850     // ffs(x) -> x ? cttz(x) + 1 : 0
851     Value *ArgValue = EmitScalarExpr(E->getArg(0));
852 
853     llvm::Type *ArgType = ArgValue->getType();
854     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
855 
856     llvm::Type *ResultType = ConvertType(E->getType());
857     Value *Tmp =
858         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
859                           llvm::ConstantInt::get(ArgType, 1));
860     Value *Zero = llvm::Constant::getNullValue(ArgType);
861     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
862     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
863     if (Result->getType() != ResultType)
864       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
865                                      "cast");
866     return RValue::get(Result);
867   }
868   case Builtin::BI__builtin_parity:
869   case Builtin::BI__builtin_parityl:
870   case Builtin::BI__builtin_parityll: {
871     // parity(x) -> ctpop(x) & 1
872     Value *ArgValue = EmitScalarExpr(E->getArg(0));
873 
874     llvm::Type *ArgType = ArgValue->getType();
875     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
876 
877     llvm::Type *ResultType = ConvertType(E->getType());
878     Value *Tmp = Builder.CreateCall(F, ArgValue);
879     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
880     if (Result->getType() != ResultType)
881       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
882                                      "cast");
883     return RValue::get(Result);
884   }
885   case Builtin::BI__popcnt16:
886   case Builtin::BI__popcnt:
887   case Builtin::BI__popcnt64:
888   case Builtin::BI__builtin_popcount:
889   case Builtin::BI__builtin_popcountl:
890   case Builtin::BI__builtin_popcountll: {
891     Value *ArgValue = EmitScalarExpr(E->getArg(0));
892 
893     llvm::Type *ArgType = ArgValue->getType();
894     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
895 
896     llvm::Type *ResultType = ConvertType(E->getType());
897     Value *Result = Builder.CreateCall(F, ArgValue);
898     if (Result->getType() != ResultType)
899       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
900                                      "cast");
901     return RValue::get(Result);
902   }
903   case Builtin::BI_rotr8:
904   case Builtin::BI_rotr16:
905   case Builtin::BI_rotr:
906   case Builtin::BI_lrotr:
907   case Builtin::BI_rotr64: {
908     Value *Val = EmitScalarExpr(E->getArg(0));
909     Value *Shift = EmitScalarExpr(E->getArg(1));
910 
911     llvm::Type *ArgType = Val->getType();
912     Shift = Builder.CreateIntCast(Shift, ArgType, false);
913     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
914     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
915     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
916 
917     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
918     Shift = Builder.CreateAnd(Shift, Mask);
919     Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
920 
921     Value *RightShifted = Builder.CreateLShr(Val, Shift);
922     Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
923     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
924 
925     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
926     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
927     return RValue::get(Result);
928   }
929   case Builtin::BI_rotl8:
930   case Builtin::BI_rotl16:
931   case Builtin::BI_rotl:
932   case Builtin::BI_lrotl:
933   case Builtin::BI_rotl64: {
934     Value *Val = EmitScalarExpr(E->getArg(0));
935     Value *Shift = EmitScalarExpr(E->getArg(1));
936 
937     llvm::Type *ArgType = Val->getType();
938     Shift = Builder.CreateIntCast(Shift, ArgType, false);
939     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
940     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
941     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
942 
943     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
944     Shift = Builder.CreateAnd(Shift, Mask);
945     Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
946 
947     Value *LeftShifted = Builder.CreateShl(Val, Shift);
948     Value *RightShifted = Builder.CreateLShr(Val, RightShift);
949     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
950 
951     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
952     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
953     return RValue::get(Result);
954   }
955   case Builtin::BI__builtin_unpredictable: {
956     // Always return the argument of __builtin_unpredictable. LLVM does not
957     // handle this builtin. Metadata for this builtin should be added directly
958     // to instructions such as branches or switches that use it.
959     return RValue::get(EmitScalarExpr(E->getArg(0)));
960   }
961   case Builtin::BI__builtin_expect: {
962     Value *ArgValue = EmitScalarExpr(E->getArg(0));
963     llvm::Type *ArgType = ArgValue->getType();
964 
965     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
966     // Don't generate llvm.expect on -O0 as the backend won't use it for
967     // anything.
968     // Note, we still IRGen ExpectedValue because it could have side-effects.
969     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
970       return RValue::get(ArgValue);
971 
972     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
973     Value *Result =
974         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
975     return RValue::get(Result);
976   }
977   case Builtin::BI__builtin_assume_aligned: {
978     Value *PtrValue = EmitScalarExpr(E->getArg(0));
979     Value *OffsetValue =
980       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
981 
982     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
983     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
984     unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
985 
986     EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
987     return RValue::get(PtrValue);
988   }
989   case Builtin::BI__assume:
990   case Builtin::BI__builtin_assume: {
991     if (E->getArg(0)->HasSideEffects(getContext()))
992       return RValue::get(nullptr);
993 
994     Value *ArgValue = EmitScalarExpr(E->getArg(0));
995     Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
996     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
997   }
998   case Builtin::BI__builtin_bswap16:
999   case Builtin::BI__builtin_bswap32:
1000   case Builtin::BI__builtin_bswap64: {
1001     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
1002   }
1003   case Builtin::BI__builtin_bitreverse8:
1004   case Builtin::BI__builtin_bitreverse16:
1005   case Builtin::BI__builtin_bitreverse32:
1006   case Builtin::BI__builtin_bitreverse64: {
1007     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
1008   }
1009   case Builtin::BI__builtin_object_size: {
1010     unsigned Type =
1011         E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
1012     auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
1013 
1014     // We pass this builtin onto the optimizer so that it can figure out the
1015     // object size in more complex cases.
1016     return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
1017                                              /*EmittedE=*/nullptr));
1018   }
1019   case Builtin::BI__builtin_prefetch: {
1020     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
1021     // FIXME: Technically these constants should of type 'int', yes?
1022     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
1023       llvm::ConstantInt::get(Int32Ty, 0);
1024     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
1025       llvm::ConstantInt::get(Int32Ty, 3);
1026     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
1027     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
1028     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
1029   }
1030   case Builtin::BI__builtin_readcyclecounter: {
1031     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
1032     return RValue::get(Builder.CreateCall(F));
1033   }
1034   case Builtin::BI__builtin___clear_cache: {
1035     Value *Begin = EmitScalarExpr(E->getArg(0));
1036     Value *End = EmitScalarExpr(E->getArg(1));
1037     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
1038     return RValue::get(Builder.CreateCall(F, {Begin, End}));
1039   }
1040   case Builtin::BI__builtin_trap:
1041     return RValue::get(EmitTrapCall(Intrinsic::trap));
1042   case Builtin::BI__debugbreak:
1043     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
1044   case Builtin::BI__builtin_unreachable: {
1045     if (SanOpts.has(SanitizerKind::Unreachable)) {
1046       SanitizerScope SanScope(this);
1047       EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
1048                                SanitizerKind::Unreachable),
1049                 SanitizerHandler::BuiltinUnreachable,
1050                 EmitCheckSourceLocation(E->getExprLoc()), None);
1051     } else
1052       Builder.CreateUnreachable();
1053 
1054     // We do need to preserve an insertion point.
1055     EmitBlock(createBasicBlock("unreachable.cont"));
1056 
1057     return RValue::get(nullptr);
1058   }
1059 
1060   case Builtin::BI__builtin_powi:
1061   case Builtin::BI__builtin_powif:
1062   case Builtin::BI__builtin_powil: {
1063     Value *Base = EmitScalarExpr(E->getArg(0));
1064     Value *Exponent = EmitScalarExpr(E->getArg(1));
1065     llvm::Type *ArgType = Base->getType();
1066     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
1067     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1068   }
1069 
1070   case Builtin::BI__builtin_isgreater:
1071   case Builtin::BI__builtin_isgreaterequal:
1072   case Builtin::BI__builtin_isless:
1073   case Builtin::BI__builtin_islessequal:
1074   case Builtin::BI__builtin_islessgreater:
1075   case Builtin::BI__builtin_isunordered: {
1076     // Ordered comparisons: we know the arguments to these are matching scalar
1077     // floating point values.
1078     Value *LHS = EmitScalarExpr(E->getArg(0));
1079     Value *RHS = EmitScalarExpr(E->getArg(1));
1080 
1081     switch (BuiltinID) {
1082     default: llvm_unreachable("Unknown ordered comparison");
1083     case Builtin::BI__builtin_isgreater:
1084       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
1085       break;
1086     case Builtin::BI__builtin_isgreaterequal:
1087       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
1088       break;
1089     case Builtin::BI__builtin_isless:
1090       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
1091       break;
1092     case Builtin::BI__builtin_islessequal:
1093       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
1094       break;
1095     case Builtin::BI__builtin_islessgreater:
1096       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
1097       break;
1098     case Builtin::BI__builtin_isunordered:
1099       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
1100       break;
1101     }
1102     // ZExt bool to int type.
1103     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
1104   }
1105   case Builtin::BI__builtin_isnan: {
1106     Value *V = EmitScalarExpr(E->getArg(0));
1107     V = Builder.CreateFCmpUNO(V, V, "cmp");
1108     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1109   }
1110 
1111   case Builtin::BIfinite:
1112   case Builtin::BI__finite:
1113   case Builtin::BIfinitef:
1114   case Builtin::BI__finitef:
1115   case Builtin::BIfinitel:
1116   case Builtin::BI__finitel:
1117   case Builtin::BI__builtin_isinf:
1118   case Builtin::BI__builtin_isfinite: {
1119     // isinf(x)    --> fabs(x) == infinity
1120     // isfinite(x) --> fabs(x) != infinity
1121     // x != NaN via the ordered compare in either case.
1122     Value *V = EmitScalarExpr(E->getArg(0));
1123     Value *Fabs = EmitFAbs(*this, V);
1124     Constant *Infinity = ConstantFP::getInfinity(V->getType());
1125     CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
1126                                   ? CmpInst::FCMP_OEQ
1127                                   : CmpInst::FCMP_ONE;
1128     Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
1129     return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
1130   }
1131 
1132   case Builtin::BI__builtin_isinf_sign: {
1133     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
1134     Value *Arg = EmitScalarExpr(E->getArg(0));
1135     Value *AbsArg = EmitFAbs(*this, Arg);
1136     Value *IsInf = Builder.CreateFCmpOEQ(
1137         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
1138     Value *IsNeg = EmitSignBit(*this, Arg);
1139 
1140     llvm::Type *IntTy = ConvertType(E->getType());
1141     Value *Zero = Constant::getNullValue(IntTy);
1142     Value *One = ConstantInt::get(IntTy, 1);
1143     Value *NegativeOne = ConstantInt::get(IntTy, -1);
1144     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
1145     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
1146     return RValue::get(Result);
1147   }
1148 
1149   case Builtin::BI__builtin_isnormal: {
1150     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
1151     Value *V = EmitScalarExpr(E->getArg(0));
1152     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
1153 
1154     Value *Abs = EmitFAbs(*this, V);
1155     Value *IsLessThanInf =
1156       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
1157     APFloat Smallest = APFloat::getSmallestNormalized(
1158                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
1159     Value *IsNormal =
1160       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
1161                             "isnormal");
1162     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
1163     V = Builder.CreateAnd(V, IsNormal, "and");
1164     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1165   }
1166 
1167   case Builtin::BI__builtin_fpclassify: {
1168     Value *V = EmitScalarExpr(E->getArg(5));
1169     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
1170 
1171     // Create Result
1172     BasicBlock *Begin = Builder.GetInsertBlock();
1173     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
1174     Builder.SetInsertPoint(End);
1175     PHINode *Result =
1176       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
1177                         "fpclassify_result");
1178 
1179     // if (V==0) return FP_ZERO
1180     Builder.SetInsertPoint(Begin);
1181     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
1182                                           "iszero");
1183     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
1184     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
1185     Builder.CreateCondBr(IsZero, End, NotZero);
1186     Result->addIncoming(ZeroLiteral, Begin);
1187 
1188     // if (V != V) return FP_NAN
1189     Builder.SetInsertPoint(NotZero);
1190     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
1191     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
1192     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
1193     Builder.CreateCondBr(IsNan, End, NotNan);
1194     Result->addIncoming(NanLiteral, NotZero);
1195 
1196     // if (fabs(V) == infinity) return FP_INFINITY
1197     Builder.SetInsertPoint(NotNan);
1198     Value *VAbs = EmitFAbs(*this, V);
1199     Value *IsInf =
1200       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
1201                             "isinf");
1202     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
1203     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
1204     Builder.CreateCondBr(IsInf, End, NotInf);
1205     Result->addIncoming(InfLiteral, NotNan);
1206 
1207     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
1208     Builder.SetInsertPoint(NotInf);
1209     APFloat Smallest = APFloat::getSmallestNormalized(
1210         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
1211     Value *IsNormal =
1212       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
1213                             "isnormal");
1214     Value *NormalResult =
1215       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
1216                            EmitScalarExpr(E->getArg(3)));
1217     Builder.CreateBr(End);
1218     Result->addIncoming(NormalResult, NotInf);
1219 
1220     // return Result
1221     Builder.SetInsertPoint(End);
1222     return RValue::get(Result);
1223   }
1224 
1225   case Builtin::BIalloca:
1226   case Builtin::BI_alloca:
1227   case Builtin::BI__builtin_alloca: {
1228     Value *Size = EmitScalarExpr(E->getArg(0));
1229     const TargetInfo &TI = getContext().getTargetInfo();
1230     // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
1231     unsigned SuitableAlignmentInBytes =
1232         CGM.getContext()
1233             .toCharUnitsFromBits(TI.getSuitableAlign())
1234             .getQuantity();
1235     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1236     AI->setAlignment(SuitableAlignmentInBytes);
1237     return RValue::get(AI);
1238   }
1239 
1240   case Builtin::BI__builtin_alloca_with_align: {
1241     Value *Size = EmitScalarExpr(E->getArg(0));
1242     Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
1243     auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
1244     unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
1245     unsigned AlignmentInBytes =
1246         CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
1247     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1248     AI->setAlignment(AlignmentInBytes);
1249     return RValue::get(AI);
1250   }
1251 
1252   case Builtin::BIbzero:
1253   case Builtin::BI__builtin_bzero: {
1254     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1255     Value *SizeVal = EmitScalarExpr(E->getArg(1));
1256     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1257                         E->getArg(0)->getExprLoc(), FD, 0);
1258     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
1259     return RValue::get(Dest.getPointer());
1260   }
1261   case Builtin::BImemcpy:
1262   case Builtin::BI__builtin_memcpy: {
1263     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1264     Address Src = EmitPointerWithAlignment(E->getArg(1));
1265     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1266     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1267                         E->getArg(0)->getExprLoc(), FD, 0);
1268     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1269                         E->getArg(1)->getExprLoc(), FD, 1);
1270     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1271     return RValue::get(Dest.getPointer());
1272   }
1273 
1274   case Builtin::BI__builtin_char_memchr:
1275     BuiltinID = Builtin::BI__builtin_memchr;
1276     break;
1277 
1278   case Builtin::BI__builtin___memcpy_chk: {
1279     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
1280     llvm::APSInt Size, DstSize;
1281     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1282         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1283       break;
1284     if (Size.ugt(DstSize))
1285       break;
1286     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1287     Address Src = EmitPointerWithAlignment(E->getArg(1));
1288     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1289     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1290     return RValue::get(Dest.getPointer());
1291   }
1292 
1293   case Builtin::BI__builtin_objc_memmove_collectable: {
1294     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1295     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1296     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1297     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1298                                                   DestAddr, SrcAddr, SizeVal);
1299     return RValue::get(DestAddr.getPointer());
1300   }
1301 
1302   case Builtin::BI__builtin___memmove_chk: {
1303     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1304     llvm::APSInt Size, DstSize;
1305     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1306         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1307       break;
1308     if (Size.ugt(DstSize))
1309       break;
1310     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1311     Address Src = EmitPointerWithAlignment(E->getArg(1));
1312     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1313     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1314     return RValue::get(Dest.getPointer());
1315   }
1316 
1317   case Builtin::BImemmove:
1318   case Builtin::BI__builtin_memmove: {
1319     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1320     Address Src = EmitPointerWithAlignment(E->getArg(1));
1321     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1322     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1323                         E->getArg(0)->getExprLoc(), FD, 0);
1324     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1325                         E->getArg(1)->getExprLoc(), FD, 1);
1326     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1327     return RValue::get(Dest.getPointer());
1328   }
1329   case Builtin::BImemset:
1330   case Builtin::BI__builtin_memset: {
1331     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1332     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1333                                          Builder.getInt8Ty());
1334     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1335     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1336                         E->getArg(0)->getExprLoc(), FD, 0);
1337     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1338     return RValue::get(Dest.getPointer());
1339   }
1340   case Builtin::BI__builtin___memset_chk: {
1341     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1342     llvm::APSInt Size, DstSize;
1343     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1344         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1345       break;
1346     if (Size.ugt(DstSize))
1347       break;
1348     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1349     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1350                                          Builder.getInt8Ty());
1351     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1352     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1353     return RValue::get(Dest.getPointer());
1354   }
1355   case Builtin::BI__builtin_dwarf_cfa: {
1356     // The offset in bytes from the first argument to the CFA.
1357     //
1358     // Why on earth is this in the frontend?  Is there any reason at
1359     // all that the backend can't reasonably determine this while
1360     // lowering llvm.eh.dwarf.cfa()?
1361     //
1362     // TODO: If there's a satisfactory reason, add a target hook for
1363     // this instead of hard-coding 0, which is correct for most targets.
1364     int32_t Offset = 0;
1365 
1366     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1367     return RValue::get(Builder.CreateCall(F,
1368                                       llvm::ConstantInt::get(Int32Ty, Offset)));
1369   }
1370   case Builtin::BI__builtin_return_address: {
1371     Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
1372                                                    getContext().UnsignedIntTy);
1373     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1374     return RValue::get(Builder.CreateCall(F, Depth));
1375   }
1376   case Builtin::BI_ReturnAddress: {
1377     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1378     return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
1379   }
1380   case Builtin::BI__builtin_frame_address: {
1381     Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
1382                                                    getContext().UnsignedIntTy);
1383     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1384     return RValue::get(Builder.CreateCall(F, Depth));
1385   }
1386   case Builtin::BI__builtin_extract_return_addr: {
1387     Value *Address = EmitScalarExpr(E->getArg(0));
1388     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1389     return RValue::get(Result);
1390   }
1391   case Builtin::BI__builtin_frob_return_addr: {
1392     Value *Address = EmitScalarExpr(E->getArg(0));
1393     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1394     return RValue::get(Result);
1395   }
1396   case Builtin::BI__builtin_dwarf_sp_column: {
1397     llvm::IntegerType *Ty
1398       = cast<llvm::IntegerType>(ConvertType(E->getType()));
1399     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1400     if (Column == -1) {
1401       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1402       return RValue::get(llvm::UndefValue::get(Ty));
1403     }
1404     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1405   }
1406   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1407     Value *Address = EmitScalarExpr(E->getArg(0));
1408     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1409       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1410     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1411   }
1412   case Builtin::BI__builtin_eh_return: {
1413     Value *Int = EmitScalarExpr(E->getArg(0));
1414     Value *Ptr = EmitScalarExpr(E->getArg(1));
1415 
1416     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1417     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1418            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1419     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1420                                   ? Intrinsic::eh_return_i32
1421                                   : Intrinsic::eh_return_i64);
1422     Builder.CreateCall(F, {Int, Ptr});
1423     Builder.CreateUnreachable();
1424 
1425     // We do need to preserve an insertion point.
1426     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1427 
1428     return RValue::get(nullptr);
1429   }
1430   case Builtin::BI__builtin_unwind_init: {
1431     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1432     return RValue::get(Builder.CreateCall(F));
1433   }
1434   case Builtin::BI__builtin_extend_pointer: {
1435     // Extends a pointer to the size of an _Unwind_Word, which is
1436     // uint64_t on all platforms.  Generally this gets poked into a
1437     // register and eventually used as an address, so if the
1438     // addressing registers are wider than pointers and the platform
1439     // doesn't implicitly ignore high-order bits when doing
1440     // addressing, we need to make sure we zext / sext based on
1441     // the platform's expectations.
1442     //
1443     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1444 
1445     // Cast the pointer to intptr_t.
1446     Value *Ptr = EmitScalarExpr(E->getArg(0));
1447     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1448 
1449     // If that's 64 bits, we're done.
1450     if (IntPtrTy->getBitWidth() == 64)
1451       return RValue::get(Result);
1452 
1453     // Otherwise, ask the codegen data what to do.
1454     if (getTargetHooks().extendPointerWithSExt())
1455       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1456     else
1457       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1458   }
1459   case Builtin::BI__builtin_setjmp: {
1460     // Buffer is a void**.
1461     Address Buf = EmitPointerWithAlignment(E->getArg(0));
1462 
1463     // Store the frame pointer to the setjmp buffer.
1464     Value *FrameAddr =
1465       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1466                          ConstantInt::get(Int32Ty, 0));
1467     Builder.CreateStore(FrameAddr, Buf);
1468 
1469     // Store the stack pointer to the setjmp buffer.
1470     Value *StackAddr =
1471         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1472     Address StackSaveSlot =
1473       Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1474     Builder.CreateStore(StackAddr, StackSaveSlot);
1475 
1476     // Call LLVM's EH setjmp, which is lightweight.
1477     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1478     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1479     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1480   }
1481   case Builtin::BI__builtin_longjmp: {
1482     Value *Buf = EmitScalarExpr(E->getArg(0));
1483     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1484 
1485     // Call LLVM's EH longjmp, which is lightweight.
1486     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1487 
1488     // longjmp doesn't return; mark this as unreachable.
1489     Builder.CreateUnreachable();
1490 
1491     // We do need to preserve an insertion point.
1492     EmitBlock(createBasicBlock("longjmp.cont"));
1493 
1494     return RValue::get(nullptr);
1495   }
1496   case Builtin::BI__sync_fetch_and_add:
1497   case Builtin::BI__sync_fetch_and_sub:
1498   case Builtin::BI__sync_fetch_and_or:
1499   case Builtin::BI__sync_fetch_and_and:
1500   case Builtin::BI__sync_fetch_and_xor:
1501   case Builtin::BI__sync_fetch_and_nand:
1502   case Builtin::BI__sync_add_and_fetch:
1503   case Builtin::BI__sync_sub_and_fetch:
1504   case Builtin::BI__sync_and_and_fetch:
1505   case Builtin::BI__sync_or_and_fetch:
1506   case Builtin::BI__sync_xor_and_fetch:
1507   case Builtin::BI__sync_nand_and_fetch:
1508   case Builtin::BI__sync_val_compare_and_swap:
1509   case Builtin::BI__sync_bool_compare_and_swap:
1510   case Builtin::BI__sync_lock_test_and_set:
1511   case Builtin::BI__sync_lock_release:
1512   case Builtin::BI__sync_swap:
1513     llvm_unreachable("Shouldn't make it through sema");
1514   case Builtin::BI__sync_fetch_and_add_1:
1515   case Builtin::BI__sync_fetch_and_add_2:
1516   case Builtin::BI__sync_fetch_and_add_4:
1517   case Builtin::BI__sync_fetch_and_add_8:
1518   case Builtin::BI__sync_fetch_and_add_16:
1519     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1520   case Builtin::BI__sync_fetch_and_sub_1:
1521   case Builtin::BI__sync_fetch_and_sub_2:
1522   case Builtin::BI__sync_fetch_and_sub_4:
1523   case Builtin::BI__sync_fetch_and_sub_8:
1524   case Builtin::BI__sync_fetch_and_sub_16:
1525     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1526   case Builtin::BI__sync_fetch_and_or_1:
1527   case Builtin::BI__sync_fetch_and_or_2:
1528   case Builtin::BI__sync_fetch_and_or_4:
1529   case Builtin::BI__sync_fetch_and_or_8:
1530   case Builtin::BI__sync_fetch_and_or_16:
1531     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1532   case Builtin::BI__sync_fetch_and_and_1:
1533   case Builtin::BI__sync_fetch_and_and_2:
1534   case Builtin::BI__sync_fetch_and_and_4:
1535   case Builtin::BI__sync_fetch_and_and_8:
1536   case Builtin::BI__sync_fetch_and_and_16:
1537     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1538   case Builtin::BI__sync_fetch_and_xor_1:
1539   case Builtin::BI__sync_fetch_and_xor_2:
1540   case Builtin::BI__sync_fetch_and_xor_4:
1541   case Builtin::BI__sync_fetch_and_xor_8:
1542   case Builtin::BI__sync_fetch_and_xor_16:
1543     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1544   case Builtin::BI__sync_fetch_and_nand_1:
1545   case Builtin::BI__sync_fetch_and_nand_2:
1546   case Builtin::BI__sync_fetch_and_nand_4:
1547   case Builtin::BI__sync_fetch_and_nand_8:
1548   case Builtin::BI__sync_fetch_and_nand_16:
1549     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1550 
1551   // Clang extensions: not overloaded yet.
1552   case Builtin::BI__sync_fetch_and_min:
1553     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1554   case Builtin::BI__sync_fetch_and_max:
1555     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1556   case Builtin::BI__sync_fetch_and_umin:
1557     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1558   case Builtin::BI__sync_fetch_and_umax:
1559     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1560 
1561   case Builtin::BI__sync_add_and_fetch_1:
1562   case Builtin::BI__sync_add_and_fetch_2:
1563   case Builtin::BI__sync_add_and_fetch_4:
1564   case Builtin::BI__sync_add_and_fetch_8:
1565   case Builtin::BI__sync_add_and_fetch_16:
1566     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1567                                 llvm::Instruction::Add);
1568   case Builtin::BI__sync_sub_and_fetch_1:
1569   case Builtin::BI__sync_sub_and_fetch_2:
1570   case Builtin::BI__sync_sub_and_fetch_4:
1571   case Builtin::BI__sync_sub_and_fetch_8:
1572   case Builtin::BI__sync_sub_and_fetch_16:
1573     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1574                                 llvm::Instruction::Sub);
1575   case Builtin::BI__sync_and_and_fetch_1:
1576   case Builtin::BI__sync_and_and_fetch_2:
1577   case Builtin::BI__sync_and_and_fetch_4:
1578   case Builtin::BI__sync_and_and_fetch_8:
1579   case Builtin::BI__sync_and_and_fetch_16:
1580     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1581                                 llvm::Instruction::And);
1582   case Builtin::BI__sync_or_and_fetch_1:
1583   case Builtin::BI__sync_or_and_fetch_2:
1584   case Builtin::BI__sync_or_and_fetch_4:
1585   case Builtin::BI__sync_or_and_fetch_8:
1586   case Builtin::BI__sync_or_and_fetch_16:
1587     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1588                                 llvm::Instruction::Or);
1589   case Builtin::BI__sync_xor_and_fetch_1:
1590   case Builtin::BI__sync_xor_and_fetch_2:
1591   case Builtin::BI__sync_xor_and_fetch_4:
1592   case Builtin::BI__sync_xor_and_fetch_8:
1593   case Builtin::BI__sync_xor_and_fetch_16:
1594     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1595                                 llvm::Instruction::Xor);
1596   case Builtin::BI__sync_nand_and_fetch_1:
1597   case Builtin::BI__sync_nand_and_fetch_2:
1598   case Builtin::BI__sync_nand_and_fetch_4:
1599   case Builtin::BI__sync_nand_and_fetch_8:
1600   case Builtin::BI__sync_nand_and_fetch_16:
1601     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1602                                 llvm::Instruction::And, true);
1603 
1604   case Builtin::BI__sync_val_compare_and_swap_1:
1605   case Builtin::BI__sync_val_compare_and_swap_2:
1606   case Builtin::BI__sync_val_compare_and_swap_4:
1607   case Builtin::BI__sync_val_compare_and_swap_8:
1608   case Builtin::BI__sync_val_compare_and_swap_16:
1609     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1610 
1611   case Builtin::BI__sync_bool_compare_and_swap_1:
1612   case Builtin::BI__sync_bool_compare_and_swap_2:
1613   case Builtin::BI__sync_bool_compare_and_swap_4:
1614   case Builtin::BI__sync_bool_compare_and_swap_8:
1615   case Builtin::BI__sync_bool_compare_and_swap_16:
1616     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1617 
1618   case Builtin::BI__sync_swap_1:
1619   case Builtin::BI__sync_swap_2:
1620   case Builtin::BI__sync_swap_4:
1621   case Builtin::BI__sync_swap_8:
1622   case Builtin::BI__sync_swap_16:
1623     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1624 
1625   case Builtin::BI__sync_lock_test_and_set_1:
1626   case Builtin::BI__sync_lock_test_and_set_2:
1627   case Builtin::BI__sync_lock_test_and_set_4:
1628   case Builtin::BI__sync_lock_test_and_set_8:
1629   case Builtin::BI__sync_lock_test_and_set_16:
1630     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1631 
1632   case Builtin::BI__sync_lock_release_1:
1633   case Builtin::BI__sync_lock_release_2:
1634   case Builtin::BI__sync_lock_release_4:
1635   case Builtin::BI__sync_lock_release_8:
1636   case Builtin::BI__sync_lock_release_16: {
1637     Value *Ptr = EmitScalarExpr(E->getArg(0));
1638     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1639     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1640     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1641                                              StoreSize.getQuantity() * 8);
1642     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1643     llvm::StoreInst *Store =
1644       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1645                                  StoreSize);
1646     Store->setAtomic(llvm::AtomicOrdering::Release);
1647     return RValue::get(nullptr);
1648   }
1649 
1650   case Builtin::BI__sync_synchronize: {
1651     // We assume this is supposed to correspond to a C++0x-style
1652     // sequentially-consistent fence (i.e. this is only usable for
1653     // synchonization, not device I/O or anything like that). This intrinsic
1654     // is really badly designed in the sense that in theory, there isn't
1655     // any way to safely use it... but in practice, it mostly works
1656     // to use it with non-atomic loads and stores to get acquire/release
1657     // semantics.
1658     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1659     return RValue::get(nullptr);
1660   }
1661 
1662   case Builtin::BI__builtin_nontemporal_load:
1663     return RValue::get(EmitNontemporalLoad(*this, E));
1664   case Builtin::BI__builtin_nontemporal_store:
1665     return RValue::get(EmitNontemporalStore(*this, E));
1666   case Builtin::BI__c11_atomic_is_lock_free:
1667   case Builtin::BI__atomic_is_lock_free: {
1668     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1669     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1670     // _Atomic(T) is always properly-aligned.
1671     const char *LibCallName = "__atomic_is_lock_free";
1672     CallArgList Args;
1673     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1674              getContext().getSizeType());
1675     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1676       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1677                getContext().VoidPtrTy);
1678     else
1679       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1680                getContext().VoidPtrTy);
1681     const CGFunctionInfo &FuncInfo =
1682         CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1683     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1684     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1685     return EmitCall(FuncInfo, CGCallee::forDirect(Func),
1686                     ReturnValueSlot(), Args);
1687   }
1688 
1689   case Builtin::BI__atomic_test_and_set: {
1690     // Look at the argument type to determine whether this is a volatile
1691     // operation. The parameter type is always volatile.
1692     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1693     bool Volatile =
1694         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1695 
1696     Value *Ptr = EmitScalarExpr(E->getArg(0));
1697     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1698     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1699     Value *NewVal = Builder.getInt8(1);
1700     Value *Order = EmitScalarExpr(E->getArg(1));
1701     if (isa<llvm::ConstantInt>(Order)) {
1702       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1703       AtomicRMWInst *Result = nullptr;
1704       switch (ord) {
1705       case 0:  // memory_order_relaxed
1706       default: // invalid order
1707         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1708                                          llvm::AtomicOrdering::Monotonic);
1709         break;
1710       case 1: // memory_order_consume
1711       case 2: // memory_order_acquire
1712         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1713                                          llvm::AtomicOrdering::Acquire);
1714         break;
1715       case 3: // memory_order_release
1716         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1717                                          llvm::AtomicOrdering::Release);
1718         break;
1719       case 4: // memory_order_acq_rel
1720 
1721         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1722                                          llvm::AtomicOrdering::AcquireRelease);
1723         break;
1724       case 5: // memory_order_seq_cst
1725         Result = Builder.CreateAtomicRMW(
1726             llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1727             llvm::AtomicOrdering::SequentiallyConsistent);
1728         break;
1729       }
1730       Result->setVolatile(Volatile);
1731       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1732     }
1733 
1734     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1735 
1736     llvm::BasicBlock *BBs[5] = {
1737       createBasicBlock("monotonic", CurFn),
1738       createBasicBlock("acquire", CurFn),
1739       createBasicBlock("release", CurFn),
1740       createBasicBlock("acqrel", CurFn),
1741       createBasicBlock("seqcst", CurFn)
1742     };
1743     llvm::AtomicOrdering Orders[5] = {
1744         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1745         llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1746         llvm::AtomicOrdering::SequentiallyConsistent};
1747 
1748     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1749     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1750 
1751     Builder.SetInsertPoint(ContBB);
1752     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1753 
1754     for (unsigned i = 0; i < 5; ++i) {
1755       Builder.SetInsertPoint(BBs[i]);
1756       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1757                                                    Ptr, NewVal, Orders[i]);
1758       RMW->setVolatile(Volatile);
1759       Result->addIncoming(RMW, BBs[i]);
1760       Builder.CreateBr(ContBB);
1761     }
1762 
1763     SI->addCase(Builder.getInt32(0), BBs[0]);
1764     SI->addCase(Builder.getInt32(1), BBs[1]);
1765     SI->addCase(Builder.getInt32(2), BBs[1]);
1766     SI->addCase(Builder.getInt32(3), BBs[2]);
1767     SI->addCase(Builder.getInt32(4), BBs[3]);
1768     SI->addCase(Builder.getInt32(5), BBs[4]);
1769 
1770     Builder.SetInsertPoint(ContBB);
1771     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1772   }
1773 
1774   case Builtin::BI__atomic_clear: {
1775     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1776     bool Volatile =
1777         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1778 
1779     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1780     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1781     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1782     Value *NewVal = Builder.getInt8(0);
1783     Value *Order = EmitScalarExpr(E->getArg(1));
1784     if (isa<llvm::ConstantInt>(Order)) {
1785       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1786       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1787       switch (ord) {
1788       case 0:  // memory_order_relaxed
1789       default: // invalid order
1790         Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1791         break;
1792       case 3:  // memory_order_release
1793         Store->setOrdering(llvm::AtomicOrdering::Release);
1794         break;
1795       case 5:  // memory_order_seq_cst
1796         Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1797         break;
1798       }
1799       return RValue::get(nullptr);
1800     }
1801 
1802     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1803 
1804     llvm::BasicBlock *BBs[3] = {
1805       createBasicBlock("monotonic", CurFn),
1806       createBasicBlock("release", CurFn),
1807       createBasicBlock("seqcst", CurFn)
1808     };
1809     llvm::AtomicOrdering Orders[3] = {
1810         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1811         llvm::AtomicOrdering::SequentiallyConsistent};
1812 
1813     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1814     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1815 
1816     for (unsigned i = 0; i < 3; ++i) {
1817       Builder.SetInsertPoint(BBs[i]);
1818       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1819       Store->setOrdering(Orders[i]);
1820       Builder.CreateBr(ContBB);
1821     }
1822 
1823     SI->addCase(Builder.getInt32(0), BBs[0]);
1824     SI->addCase(Builder.getInt32(3), BBs[1]);
1825     SI->addCase(Builder.getInt32(5), BBs[2]);
1826 
1827     Builder.SetInsertPoint(ContBB);
1828     return RValue::get(nullptr);
1829   }
1830 
1831   case Builtin::BI__atomic_thread_fence:
1832   case Builtin::BI__atomic_signal_fence:
1833   case Builtin::BI__c11_atomic_thread_fence:
1834   case Builtin::BI__c11_atomic_signal_fence: {
1835     llvm::SyncScope::ID SSID;
1836     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1837         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1838       SSID = llvm::SyncScope::SingleThread;
1839     else
1840       SSID = llvm::SyncScope::System;
1841     Value *Order = EmitScalarExpr(E->getArg(0));
1842     if (isa<llvm::ConstantInt>(Order)) {
1843       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1844       switch (ord) {
1845       case 0:  // memory_order_relaxed
1846       default: // invalid order
1847         break;
1848       case 1:  // memory_order_consume
1849       case 2:  // memory_order_acquire
1850         Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
1851         break;
1852       case 3:  // memory_order_release
1853         Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
1854         break;
1855       case 4:  // memory_order_acq_rel
1856         Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
1857         break;
1858       case 5:  // memory_order_seq_cst
1859         Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
1860         break;
1861       }
1862       return RValue::get(nullptr);
1863     }
1864 
1865     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1866     AcquireBB = createBasicBlock("acquire", CurFn);
1867     ReleaseBB = createBasicBlock("release", CurFn);
1868     AcqRelBB = createBasicBlock("acqrel", CurFn);
1869     SeqCstBB = createBasicBlock("seqcst", CurFn);
1870     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1871 
1872     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1873     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1874 
1875     Builder.SetInsertPoint(AcquireBB);
1876     Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
1877     Builder.CreateBr(ContBB);
1878     SI->addCase(Builder.getInt32(1), AcquireBB);
1879     SI->addCase(Builder.getInt32(2), AcquireBB);
1880 
1881     Builder.SetInsertPoint(ReleaseBB);
1882     Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
1883     Builder.CreateBr(ContBB);
1884     SI->addCase(Builder.getInt32(3), ReleaseBB);
1885 
1886     Builder.SetInsertPoint(AcqRelBB);
1887     Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
1888     Builder.CreateBr(ContBB);
1889     SI->addCase(Builder.getInt32(4), AcqRelBB);
1890 
1891     Builder.SetInsertPoint(SeqCstBB);
1892     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
1893     Builder.CreateBr(ContBB);
1894     SI->addCase(Builder.getInt32(5), SeqCstBB);
1895 
1896     Builder.SetInsertPoint(ContBB);
1897     return RValue::get(nullptr);
1898   }
1899 
1900     // Library functions with special handling.
1901   case Builtin::BIsqrt:
1902   case Builtin::BIsqrtf:
1903   case Builtin::BIsqrtl: {
1904     // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1905     // in finite- or unsafe-math mode (the intrinsic has different semantics
1906     // for handling negative numbers compared to the library function, so
1907     // -fmath-errno=0 is not enough).
1908     if (!FD->hasAttr<ConstAttr>())
1909       break;
1910     if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1911           CGM.getCodeGenOpts().NoNaNsFPMath))
1912       break;
1913     Value *Arg0 = EmitScalarExpr(E->getArg(0));
1914     llvm::Type *ArgType = Arg0->getType();
1915     Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1916     return RValue::get(Builder.CreateCall(F, Arg0));
1917   }
1918 
1919   case Builtin::BI__builtin_pow:
1920   case Builtin::BI__builtin_powf:
1921   case Builtin::BI__builtin_powl:
1922   case Builtin::BIpow:
1923   case Builtin::BIpowf:
1924   case Builtin::BIpowl: {
1925     // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1926     if (!FD->hasAttr<ConstAttr>())
1927       break;
1928     Value *Base = EmitScalarExpr(E->getArg(0));
1929     Value *Exponent = EmitScalarExpr(E->getArg(1));
1930     llvm::Type *ArgType = Base->getType();
1931     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1932     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1933   }
1934 
1935   case Builtin::BIfma:
1936   case Builtin::BIfmaf:
1937   case Builtin::BIfmal:
1938   case Builtin::BI__builtin_fma:
1939   case Builtin::BI__builtin_fmaf:
1940   case Builtin::BI__builtin_fmal: {
1941     // Rewrite fma to intrinsic.
1942     Value *FirstArg = EmitScalarExpr(E->getArg(0));
1943     llvm::Type *ArgType = FirstArg->getType();
1944     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1945     return RValue::get(
1946         Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1947                                EmitScalarExpr(E->getArg(2))}));
1948   }
1949 
1950   case Builtin::BI__builtin_signbit:
1951   case Builtin::BI__builtin_signbitf:
1952   case Builtin::BI__builtin_signbitl: {
1953     return RValue::get(
1954         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1955                            ConvertType(E->getType())));
1956   }
1957   case Builtin::BI__annotation: {
1958     // Re-encode each wide string to UTF8 and make an MDString.
1959     SmallVector<Metadata *, 1> Strings;
1960     for (const Expr *Arg : E->arguments()) {
1961       const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
1962       assert(Str->getCharByteWidth() == 2);
1963       StringRef WideBytes = Str->getBytes();
1964       std::string StrUtf8;
1965       if (!convertUTF16ToUTF8String(
1966               makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
1967         CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
1968         continue;
1969       }
1970       Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
1971     }
1972 
1973     // Build and MDTuple of MDStrings and emit the intrinsic call.
1974     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
1975     MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
1976     Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
1977     return RValue::getIgnored();
1978   }
1979   case Builtin::BI__builtin_annotation: {
1980     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1981     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1982                                       AnnVal->getType());
1983 
1984     // Get the annotation string, go through casts. Sema requires this to be a
1985     // non-wide string literal, potentially casted, so the cast<> is safe.
1986     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1987     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1988     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1989   }
1990   case Builtin::BI__builtin_addcb:
1991   case Builtin::BI__builtin_addcs:
1992   case Builtin::BI__builtin_addc:
1993   case Builtin::BI__builtin_addcl:
1994   case Builtin::BI__builtin_addcll:
1995   case Builtin::BI__builtin_subcb:
1996   case Builtin::BI__builtin_subcs:
1997   case Builtin::BI__builtin_subc:
1998   case Builtin::BI__builtin_subcl:
1999   case Builtin::BI__builtin_subcll: {
2000 
2001     // We translate all of these builtins from expressions of the form:
2002     //   int x = ..., y = ..., carryin = ..., carryout, result;
2003     //   result = __builtin_addc(x, y, carryin, &carryout);
2004     //
2005     // to LLVM IR of the form:
2006     //
2007     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
2008     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
2009     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
2010     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
2011     //                                                       i32 %carryin)
2012     //   %result = extractvalue {i32, i1} %tmp2, 0
2013     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
2014     //   %tmp3 = or i1 %carry1, %carry2
2015     //   %tmp4 = zext i1 %tmp3 to i32
2016     //   store i32 %tmp4, i32* %carryout
2017 
2018     // Scalarize our inputs.
2019     llvm::Value *X = EmitScalarExpr(E->getArg(0));
2020     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2021     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
2022     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
2023 
2024     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
2025     llvm::Intrinsic::ID IntrinsicId;
2026     switch (BuiltinID) {
2027     default: llvm_unreachable("Unknown multiprecision builtin id.");
2028     case Builtin::BI__builtin_addcb:
2029     case Builtin::BI__builtin_addcs:
2030     case Builtin::BI__builtin_addc:
2031     case Builtin::BI__builtin_addcl:
2032     case Builtin::BI__builtin_addcll:
2033       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2034       break;
2035     case Builtin::BI__builtin_subcb:
2036     case Builtin::BI__builtin_subcs:
2037     case Builtin::BI__builtin_subc:
2038     case Builtin::BI__builtin_subcl:
2039     case Builtin::BI__builtin_subcll:
2040       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2041       break;
2042     }
2043 
2044     // Construct our resulting LLVM IR expression.
2045     llvm::Value *Carry1;
2046     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
2047                                               X, Y, Carry1);
2048     llvm::Value *Carry2;
2049     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
2050                                               Sum1, Carryin, Carry2);
2051     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
2052                                                X->getType());
2053     Builder.CreateStore(CarryOut, CarryOutPtr);
2054     return RValue::get(Sum2);
2055   }
2056 
2057   case Builtin::BI__builtin_add_overflow:
2058   case Builtin::BI__builtin_sub_overflow:
2059   case Builtin::BI__builtin_mul_overflow: {
2060     const clang::Expr *LeftArg = E->getArg(0);
2061     const clang::Expr *RightArg = E->getArg(1);
2062     const clang::Expr *ResultArg = E->getArg(2);
2063 
2064     clang::QualType ResultQTy =
2065         ResultArg->getType()->castAs<PointerType>()->getPointeeType();
2066 
2067     WidthAndSignedness LeftInfo =
2068         getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
2069     WidthAndSignedness RightInfo =
2070         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
2071     WidthAndSignedness ResultInfo =
2072         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
2073     WidthAndSignedness EncompassingInfo =
2074         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
2075 
2076     llvm::Type *EncompassingLLVMTy =
2077         llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
2078 
2079     llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
2080 
2081     llvm::Intrinsic::ID IntrinsicId;
2082     switch (BuiltinID) {
2083     default:
2084       llvm_unreachable("Unknown overflow builtin id.");
2085     case Builtin::BI__builtin_add_overflow:
2086       IntrinsicId = EncompassingInfo.Signed
2087                         ? llvm::Intrinsic::sadd_with_overflow
2088                         : llvm::Intrinsic::uadd_with_overflow;
2089       break;
2090     case Builtin::BI__builtin_sub_overflow:
2091       IntrinsicId = EncompassingInfo.Signed
2092                         ? llvm::Intrinsic::ssub_with_overflow
2093                         : llvm::Intrinsic::usub_with_overflow;
2094       break;
2095     case Builtin::BI__builtin_mul_overflow:
2096       IntrinsicId = EncompassingInfo.Signed
2097                         ? llvm::Intrinsic::smul_with_overflow
2098                         : llvm::Intrinsic::umul_with_overflow;
2099       break;
2100     }
2101 
2102     llvm::Value *Left = EmitScalarExpr(LeftArg);
2103     llvm::Value *Right = EmitScalarExpr(RightArg);
2104     Address ResultPtr = EmitPointerWithAlignment(ResultArg);
2105 
2106     // Extend each operand to the encompassing type.
2107     Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
2108     Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
2109 
2110     // Perform the operation on the extended values.
2111     llvm::Value *Overflow, *Result;
2112     Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
2113 
2114     if (EncompassingInfo.Width > ResultInfo.Width) {
2115       // The encompassing type is wider than the result type, so we need to
2116       // truncate it.
2117       llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
2118 
2119       // To see if the truncation caused an overflow, we will extend
2120       // the result and then compare it to the original result.
2121       llvm::Value *ResultTruncExt = Builder.CreateIntCast(
2122           ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
2123       llvm::Value *TruncationOverflow =
2124           Builder.CreateICmpNE(Result, ResultTruncExt);
2125 
2126       Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
2127       Result = ResultTrunc;
2128     }
2129 
2130     // Finally, store the result using the pointer.
2131     bool isVolatile =
2132       ResultArg->getType()->getPointeeType().isVolatileQualified();
2133     Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
2134 
2135     return RValue::get(Overflow);
2136   }
2137 
2138   case Builtin::BI__builtin_uadd_overflow:
2139   case Builtin::BI__builtin_uaddl_overflow:
2140   case Builtin::BI__builtin_uaddll_overflow:
2141   case Builtin::BI__builtin_usub_overflow:
2142   case Builtin::BI__builtin_usubl_overflow:
2143   case Builtin::BI__builtin_usubll_overflow:
2144   case Builtin::BI__builtin_umul_overflow:
2145   case Builtin::BI__builtin_umull_overflow:
2146   case Builtin::BI__builtin_umulll_overflow:
2147   case Builtin::BI__builtin_sadd_overflow:
2148   case Builtin::BI__builtin_saddl_overflow:
2149   case Builtin::BI__builtin_saddll_overflow:
2150   case Builtin::BI__builtin_ssub_overflow:
2151   case Builtin::BI__builtin_ssubl_overflow:
2152   case Builtin::BI__builtin_ssubll_overflow:
2153   case Builtin::BI__builtin_smul_overflow:
2154   case Builtin::BI__builtin_smull_overflow:
2155   case Builtin::BI__builtin_smulll_overflow: {
2156 
2157     // We translate all of these builtins directly to the relevant llvm IR node.
2158 
2159     // Scalarize our inputs.
2160     llvm::Value *X = EmitScalarExpr(E->getArg(0));
2161     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2162     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
2163 
2164     // Decide which of the overflow intrinsics we are lowering to:
2165     llvm::Intrinsic::ID IntrinsicId;
2166     switch (BuiltinID) {
2167     default: llvm_unreachable("Unknown overflow builtin id.");
2168     case Builtin::BI__builtin_uadd_overflow:
2169     case Builtin::BI__builtin_uaddl_overflow:
2170     case Builtin::BI__builtin_uaddll_overflow:
2171       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2172       break;
2173     case Builtin::BI__builtin_usub_overflow:
2174     case Builtin::BI__builtin_usubl_overflow:
2175     case Builtin::BI__builtin_usubll_overflow:
2176       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2177       break;
2178     case Builtin::BI__builtin_umul_overflow:
2179     case Builtin::BI__builtin_umull_overflow:
2180     case Builtin::BI__builtin_umulll_overflow:
2181       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
2182       break;
2183     case Builtin::BI__builtin_sadd_overflow:
2184     case Builtin::BI__builtin_saddl_overflow:
2185     case Builtin::BI__builtin_saddll_overflow:
2186       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
2187       break;
2188     case Builtin::BI__builtin_ssub_overflow:
2189     case Builtin::BI__builtin_ssubl_overflow:
2190     case Builtin::BI__builtin_ssubll_overflow:
2191       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
2192       break;
2193     case Builtin::BI__builtin_smul_overflow:
2194     case Builtin::BI__builtin_smull_overflow:
2195     case Builtin::BI__builtin_smulll_overflow:
2196       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
2197       break;
2198     }
2199 
2200 
2201     llvm::Value *Carry;
2202     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
2203     Builder.CreateStore(Sum, SumOutPtr);
2204 
2205     return RValue::get(Carry);
2206   }
2207   case Builtin::BI__builtin_addressof:
2208     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
2209   case Builtin::BI__builtin_operator_new:
2210     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2211                                     E->getArg(0), false);
2212   case Builtin::BI__builtin_operator_delete:
2213     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2214                                     E->getArg(0), true);
2215   case Builtin::BI__noop:
2216     // __noop always evaluates to an integer literal zero.
2217     return RValue::get(ConstantInt::get(IntTy, 0));
2218   case Builtin::BI__builtin_call_with_static_chain: {
2219     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
2220     const Expr *Chain = E->getArg(1);
2221     return EmitCall(Call->getCallee()->getType(),
2222                     EmitCallee(Call->getCallee()), Call, ReturnValue,
2223                     EmitScalarExpr(Chain));
2224   }
2225   case Builtin::BI_InterlockedExchange8:
2226   case Builtin::BI_InterlockedExchange16:
2227   case Builtin::BI_InterlockedExchange:
2228   case Builtin::BI_InterlockedExchangePointer:
2229     return RValue::get(
2230         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
2231   case Builtin::BI_InterlockedCompareExchangePointer: {
2232     llvm::Type *RTy;
2233     llvm::IntegerType *IntType =
2234       IntegerType::get(getLLVMContext(),
2235                        getContext().getTypeSize(E->getType()));
2236     llvm::Type *IntPtrType = IntType->getPointerTo();
2237 
2238     llvm::Value *Destination =
2239       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
2240 
2241     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
2242     RTy = Exchange->getType();
2243     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
2244 
2245     llvm::Value *Comparand =
2246       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
2247 
2248     auto Result =
2249         Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
2250                                     AtomicOrdering::SequentiallyConsistent,
2251                                     AtomicOrdering::SequentiallyConsistent);
2252     Result->setVolatile(true);
2253 
2254     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
2255                                                                          0),
2256                                               RTy));
2257   }
2258   case Builtin::BI_InterlockedCompareExchange8:
2259   case Builtin::BI_InterlockedCompareExchange16:
2260   case Builtin::BI_InterlockedCompareExchange:
2261   case Builtin::BI_InterlockedCompareExchange64: {
2262     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
2263         EmitScalarExpr(E->getArg(0)),
2264         EmitScalarExpr(E->getArg(2)),
2265         EmitScalarExpr(E->getArg(1)),
2266         AtomicOrdering::SequentiallyConsistent,
2267         AtomicOrdering::SequentiallyConsistent);
2268       CXI->setVolatile(true);
2269       return RValue::get(Builder.CreateExtractValue(CXI, 0));
2270   }
2271   case Builtin::BI_InterlockedIncrement16:
2272   case Builtin::BI_InterlockedIncrement:
2273     return RValue::get(
2274         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
2275   case Builtin::BI_InterlockedDecrement16:
2276   case Builtin::BI_InterlockedDecrement:
2277     return RValue::get(
2278         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
2279   case Builtin::BI_InterlockedAnd8:
2280   case Builtin::BI_InterlockedAnd16:
2281   case Builtin::BI_InterlockedAnd:
2282     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
2283   case Builtin::BI_InterlockedExchangeAdd8:
2284   case Builtin::BI_InterlockedExchangeAdd16:
2285   case Builtin::BI_InterlockedExchangeAdd:
2286     return RValue::get(
2287         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
2288   case Builtin::BI_InterlockedExchangeSub8:
2289   case Builtin::BI_InterlockedExchangeSub16:
2290   case Builtin::BI_InterlockedExchangeSub:
2291     return RValue::get(
2292         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
2293   case Builtin::BI_InterlockedOr8:
2294   case Builtin::BI_InterlockedOr16:
2295   case Builtin::BI_InterlockedOr:
2296     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
2297   case Builtin::BI_InterlockedXor8:
2298   case Builtin::BI_InterlockedXor16:
2299   case Builtin::BI_InterlockedXor:
2300     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
2301   case Builtin::BI_interlockedbittestandset:
2302     return RValue::get(
2303         EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
2304 
2305   case Builtin::BI__exception_code:
2306   case Builtin::BI_exception_code:
2307     return RValue::get(EmitSEHExceptionCode());
2308   case Builtin::BI__exception_info:
2309   case Builtin::BI_exception_info:
2310     return RValue::get(EmitSEHExceptionInfo());
2311   case Builtin::BI__abnormal_termination:
2312   case Builtin::BI_abnormal_termination:
2313     return RValue::get(EmitSEHAbnormalTermination());
2314   case Builtin::BI_setjmpex: {
2315     if (getTarget().getTriple().isOSMSVCRT()) {
2316       llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2317       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2318           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2319           llvm::Attribute::ReturnsTwice);
2320       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2321           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2322           "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
2323       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2324           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2325       llvm::Value *FrameAddr =
2326           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2327                              ConstantInt::get(Int32Ty, 0));
2328       llvm::Value *Args[] = {Buf, FrameAddr};
2329       llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2330       CS.setAttributes(ReturnsTwiceAttr);
2331       return RValue::get(CS.getInstruction());
2332     }
2333     break;
2334   }
2335   case Builtin::BI_setjmp: {
2336     if (getTarget().getTriple().isOSMSVCRT()) {
2337       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2338           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2339           llvm::Attribute::ReturnsTwice);
2340       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2341           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2342       llvm::CallSite CS;
2343       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2344         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2345         llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2346             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2347             "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
2348         llvm::Value *Count = ConstantInt::get(IntTy, 0);
2349         llvm::Value *Args[] = {Buf, Count};
2350         CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2351       } else {
2352         llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2353         llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2354             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2355             "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
2356         llvm::Value *FrameAddr =
2357             Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2358                                ConstantInt::get(Int32Ty, 0));
2359         llvm::Value *Args[] = {Buf, FrameAddr};
2360         CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2361       }
2362       CS.setAttributes(ReturnsTwiceAttr);
2363       return RValue::get(CS.getInstruction());
2364     }
2365     break;
2366   }
2367 
2368   case Builtin::BI__GetExceptionInfo: {
2369     if (llvm::GlobalVariable *GV =
2370             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2371       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2372     break;
2373   }
2374 
2375   case Builtin::BI__fastfail:
2376     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
2377 
2378   case Builtin::BI__builtin_coro_size: {
2379     auto & Context = getContext();
2380     auto SizeTy = Context.getSizeType();
2381     auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
2382     Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
2383     return RValue::get(Builder.CreateCall(F));
2384   }
2385 
2386   case Builtin::BI__builtin_coro_id:
2387     return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
2388   case Builtin::BI__builtin_coro_promise:
2389     return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
2390   case Builtin::BI__builtin_coro_resume:
2391     return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
2392   case Builtin::BI__builtin_coro_frame:
2393     return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
2394   case Builtin::BI__builtin_coro_free:
2395     return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
2396   case Builtin::BI__builtin_coro_destroy:
2397     return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
2398   case Builtin::BI__builtin_coro_done:
2399     return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
2400   case Builtin::BI__builtin_coro_alloc:
2401     return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
2402   case Builtin::BI__builtin_coro_begin:
2403     return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
2404   case Builtin::BI__builtin_coro_end:
2405     return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
2406   case Builtin::BI__builtin_coro_suspend:
2407     return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
2408   case Builtin::BI__builtin_coro_param:
2409     return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
2410 
2411   // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2412   case Builtin::BIread_pipe:
2413   case Builtin::BIwrite_pipe: {
2414     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2415           *Arg1 = EmitScalarExpr(E->getArg(1));
2416     CGOpenCLRuntime OpenCLRT(CGM);
2417     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2418     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2419 
2420     // Type of the generic packet parameter.
2421     unsigned GenericAS =
2422         getContext().getTargetAddressSpace(LangAS::opencl_generic);
2423     llvm::Type *I8PTy = llvm::PointerType::get(
2424         llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2425 
2426     // Testing which overloaded version we should generate the call for.
2427     if (2U == E->getNumArgs()) {
2428       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2429                                                              : "__write_pipe_2";
2430       // Creating a generic function type to be able to call with any builtin or
2431       // user defined type.
2432       llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
2433       llvm::FunctionType *FTy = llvm::FunctionType::get(
2434           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2435       Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2436       return RValue::get(
2437           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2438                              {Arg0, BCast, PacketSize, PacketAlign}));
2439     } else {
2440       assert(4 == E->getNumArgs() &&
2441              "Illegal number of parameters to pipe function");
2442       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2443                                                              : "__write_pipe_4";
2444 
2445       llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
2446                               Int32Ty, Int32Ty};
2447       Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2448             *Arg3 = EmitScalarExpr(E->getArg(3));
2449       llvm::FunctionType *FTy = llvm::FunctionType::get(
2450           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2451       Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2452       // We know the third argument is an integer type, but we may need to cast
2453       // it to i32.
2454       if (Arg2->getType() != Int32Ty)
2455         Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2456       return RValue::get(Builder.CreateCall(
2457           CGM.CreateRuntimeFunction(FTy, Name),
2458           {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
2459     }
2460   }
2461   // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2462   // functions
2463   case Builtin::BIreserve_read_pipe:
2464   case Builtin::BIreserve_write_pipe:
2465   case Builtin::BIwork_group_reserve_read_pipe:
2466   case Builtin::BIwork_group_reserve_write_pipe:
2467   case Builtin::BIsub_group_reserve_read_pipe:
2468   case Builtin::BIsub_group_reserve_write_pipe: {
2469     // Composing the mangled name for the function.
2470     const char *Name;
2471     if (BuiltinID == Builtin::BIreserve_read_pipe)
2472       Name = "__reserve_read_pipe";
2473     else if (BuiltinID == Builtin::BIreserve_write_pipe)
2474       Name = "__reserve_write_pipe";
2475     else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2476       Name = "__work_group_reserve_read_pipe";
2477     else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2478       Name = "__work_group_reserve_write_pipe";
2479     else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2480       Name = "__sub_group_reserve_read_pipe";
2481     else
2482       Name = "__sub_group_reserve_write_pipe";
2483 
2484     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2485           *Arg1 = EmitScalarExpr(E->getArg(1));
2486     llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2487     CGOpenCLRuntime OpenCLRT(CGM);
2488     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2489     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2490 
2491     // Building the generic function prototype.
2492     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
2493     llvm::FunctionType *FTy = llvm::FunctionType::get(
2494         ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2495     // We know the second argument is an integer type, but we may need to cast
2496     // it to i32.
2497     if (Arg1->getType() != Int32Ty)
2498       Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2499     return RValue::get(
2500         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2501                            {Arg0, Arg1, PacketSize, PacketAlign}));
2502   }
2503   // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2504   // functions
2505   case Builtin::BIcommit_read_pipe:
2506   case Builtin::BIcommit_write_pipe:
2507   case Builtin::BIwork_group_commit_read_pipe:
2508   case Builtin::BIwork_group_commit_write_pipe:
2509   case Builtin::BIsub_group_commit_read_pipe:
2510   case Builtin::BIsub_group_commit_write_pipe: {
2511     const char *Name;
2512     if (BuiltinID == Builtin::BIcommit_read_pipe)
2513       Name = "__commit_read_pipe";
2514     else if (BuiltinID == Builtin::BIcommit_write_pipe)
2515       Name = "__commit_write_pipe";
2516     else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2517       Name = "__work_group_commit_read_pipe";
2518     else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2519       Name = "__work_group_commit_write_pipe";
2520     else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2521       Name = "__sub_group_commit_read_pipe";
2522     else
2523       Name = "__sub_group_commit_write_pipe";
2524 
2525     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2526           *Arg1 = EmitScalarExpr(E->getArg(1));
2527     CGOpenCLRuntime OpenCLRT(CGM);
2528     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2529     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2530 
2531     // Building the generic function prototype.
2532     llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
2533     llvm::FunctionType *FTy =
2534         llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2535                                 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2536 
2537     return RValue::get(
2538         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2539                            {Arg0, Arg1, PacketSize, PacketAlign}));
2540   }
2541   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2542   case Builtin::BIget_pipe_num_packets:
2543   case Builtin::BIget_pipe_max_packets: {
2544     const char *Name;
2545     if (BuiltinID == Builtin::BIget_pipe_num_packets)
2546       Name = "__get_pipe_num_packets";
2547     else
2548       Name = "__get_pipe_max_packets";
2549 
2550     // Building the generic function prototype.
2551     Value *Arg0 = EmitScalarExpr(E->getArg(0));
2552     CGOpenCLRuntime OpenCLRT(CGM);
2553     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2554     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2555     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
2556     llvm::FunctionType *FTy = llvm::FunctionType::get(
2557         Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2558 
2559     return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2560                                           {Arg0, PacketSize, PacketAlign}));
2561   }
2562 
2563   // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2564   case Builtin::BIto_global:
2565   case Builtin::BIto_local:
2566   case Builtin::BIto_private: {
2567     auto Arg0 = EmitScalarExpr(E->getArg(0));
2568     auto NewArgT = llvm::PointerType::get(Int8Ty,
2569       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2570     auto NewRetT = llvm::PointerType::get(Int8Ty,
2571       CGM.getContext().getTargetAddressSpace(
2572         E->getType()->getPointeeType().getAddressSpace()));
2573     auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2574     llvm::Value *NewArg;
2575     if (Arg0->getType()->getPointerAddressSpace() !=
2576         NewArgT->getPointerAddressSpace())
2577       NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2578     else
2579       NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2580     auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2581     auto NewCall =
2582         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2583     return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2584       ConvertType(E->getType())));
2585   }
2586 
2587   // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2588   // It contains four different overload formats specified in Table 6.13.17.1.
2589   case Builtin::BIenqueue_kernel: {
2590     StringRef Name; // Generated function call name
2591     unsigned NumArgs = E->getNumArgs();
2592 
2593     llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2594     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2595         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2596 
2597     llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2598     llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2599     LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
2600     llvm::Value *Range = NDRangeL.getAddress().getPointer();
2601     llvm::Type *RangeTy = NDRangeL.getAddress().getType();
2602 
2603     if (NumArgs == 4) {
2604       // The most basic form of the call with parameters:
2605       // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2606       Name = "__enqueue_kernel_basic";
2607       llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy};
2608       llvm::FunctionType *FTy = llvm::FunctionType::get(
2609           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
2610 
2611       llvm::Value *Block = Builder.CreatePointerCast(
2612           EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
2613 
2614       AttrBuilder B;
2615       B.addAttribute(Attribute::ByVal);
2616       llvm::AttributeList ByValAttrSet =
2617           llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
2618 
2619       auto RTCall =
2620           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
2621                              {Queue, Flags, Range, Block});
2622       RTCall->setAttributes(ByValAttrSet);
2623       return RValue::get(RTCall);
2624     }
2625     assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2626 
2627     // Create a temporary array to hold the sizes of local pointer arguments
2628     // for the block. \p First is the position of the first size argument.
2629     auto CreateArrayForSizeVar = [=](unsigned First) {
2630       auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First);
2631       auto *Arr = Builder.CreateAlloca(AT);
2632       llvm::Value *Ptr;
2633       // Each of the following arguments specifies the size of the corresponding
2634       // argument passed to the enqueued block.
2635       auto *Zero = llvm::ConstantInt::get(IntTy, 0);
2636       for (unsigned I = First; I < NumArgs; ++I) {
2637         auto *Index = llvm::ConstantInt::get(IntTy, I - First);
2638         auto *GEP = Builder.CreateGEP(Arr, {Zero, Index});
2639         if (I == First)
2640           Ptr = GEP;
2641         auto *V =
2642             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
2643         Builder.CreateAlignedStore(
2644             V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
2645       }
2646       return Ptr;
2647     };
2648 
2649     // Could have events and/or vaargs.
2650     if (E->getArg(3)->getType()->isBlockPointerType()) {
2651       // No events passed, but has variadic arguments.
2652       Name = "__enqueue_kernel_vaargs";
2653       auto *Block = Builder.CreatePointerCast(EmitScalarExpr(E->getArg(3)),
2654                                               GenericVoidPtrTy);
2655       auto *PtrToSizeArray = CreateArrayForSizeVar(4);
2656 
2657       // Create a vector of the arguments, as well as a constant value to
2658       // express to the runtime the number of variadic arguments.
2659       std::vector<llvm::Value *> Args = {Queue,
2660                                          Flags,
2661                                          Range,
2662                                          Block,
2663                                          ConstantInt::get(IntTy, NumArgs - 4),
2664                                          PtrToSizeArray};
2665       std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy,
2666                                           RangeTy, GenericVoidPtrTy,
2667                                           IntTy,   PtrToSizeArray->getType()};
2668 
2669       llvm::FunctionType *FTy = llvm::FunctionType::get(
2670           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2671       return RValue::get(
2672           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2673                              llvm::ArrayRef<llvm::Value *>(Args)));
2674     }
2675     // Any calls now have event arguments passed.
2676     if (NumArgs >= 7) {
2677       llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2678       llvm::Type *EventPtrTy = EventTy->getPointerTo(
2679           CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2680 
2681       llvm::Value *NumEvents =
2682           Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
2683       llvm::Value *EventList =
2684           E->getArg(4)->getType()->isArrayType()
2685               ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2686               : EmitScalarExpr(E->getArg(4));
2687       llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2688       // Convert to generic address space.
2689       EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
2690       ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
2691       llvm::Value *Block = Builder.CreatePointerCast(
2692           EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy);
2693 
2694       std::vector<llvm::Type *> ArgTys = {
2695           QueueTy,    Int32Ty,    RangeTy,         Int32Ty,
2696           EventPtrTy, EventPtrTy, GenericVoidPtrTy};
2697 
2698       std::vector<llvm::Value *> Args = {Queue,     Flags,    Range, NumEvents,
2699                                          EventList, ClkEvent, Block};
2700 
2701       if (NumArgs == 7) {
2702         // Has events but no variadics.
2703         Name = "__enqueue_kernel_basic_events";
2704         llvm::FunctionType *FTy = llvm::FunctionType::get(
2705             Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2706         return RValue::get(
2707             Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2708                                llvm::ArrayRef<llvm::Value *>(Args)));
2709       }
2710       // Has event info and variadics
2711       // Pass the number of variadics to the runtime function too.
2712       Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2713       ArgTys.push_back(Int32Ty);
2714       Name = "__enqueue_kernel_events_vaargs";
2715 
2716       auto *PtrToSizeArray = CreateArrayForSizeVar(7);
2717       Args.push_back(PtrToSizeArray);
2718       ArgTys.push_back(PtrToSizeArray->getType());
2719 
2720       llvm::FunctionType *FTy = llvm::FunctionType::get(
2721           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2722       return RValue::get(
2723           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2724                              llvm::ArrayRef<llvm::Value *>(Args)));
2725     }
2726     LLVM_FALLTHROUGH;
2727   }
2728   // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2729   // parameter.
2730   case Builtin::BIget_kernel_work_group_size: {
2731     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2732         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2733     Value *Arg = EmitScalarExpr(E->getArg(0));
2734     Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2735     return RValue::get(Builder.CreateCall(
2736         CGM.CreateRuntimeFunction(
2737             llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2738             "__get_kernel_work_group_size_impl"),
2739         Arg));
2740   }
2741   case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
2742     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2743         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2744     Value *Arg = EmitScalarExpr(E->getArg(0));
2745     Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2746     return RValue::get(Builder.CreateCall(
2747         CGM.CreateRuntimeFunction(
2748             llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2749             "__get_kernel_preferred_work_group_multiple_impl"),
2750         Arg));
2751   }
2752   case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
2753   case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
2754     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2755         getContext().getTargetAddressSpace(LangAS::opencl_generic));
2756     LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
2757     llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
2758     Value *Block = EmitScalarExpr(E->getArg(1));
2759     Block = Builder.CreatePointerCast(Block, GenericVoidPtrTy);
2760     const char *Name =
2761         BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
2762             ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
2763             : "__get_kernel_sub_group_count_for_ndrange_impl";
2764     return RValue::get(Builder.CreateCall(
2765         CGM.CreateRuntimeFunction(
2766             llvm::FunctionType::get(
2767                 IntTy, {NDRange->getType(), GenericVoidPtrTy}, false),
2768             Name),
2769         {NDRange, Block}));
2770   }
2771 
2772   case Builtin::BI__builtin_store_half:
2773   case Builtin::BI__builtin_store_halff: {
2774     Value *Val = EmitScalarExpr(E->getArg(0));
2775     Address Address = EmitPointerWithAlignment(E->getArg(1));
2776     Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
2777     return RValue::get(Builder.CreateStore(HalfVal, Address));
2778   }
2779   case Builtin::BI__builtin_load_half: {
2780     Address Address = EmitPointerWithAlignment(E->getArg(0));
2781     Value *HalfVal = Builder.CreateLoad(Address);
2782     return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
2783   }
2784   case Builtin::BI__builtin_load_halff: {
2785     Address Address = EmitPointerWithAlignment(E->getArg(0));
2786     Value *HalfVal = Builder.CreateLoad(Address);
2787     return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
2788   }
2789   case Builtin::BIprintf:
2790     if (getTarget().getTriple().isNVPTX())
2791       return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
2792     break;
2793   case Builtin::BI__builtin_canonicalize:
2794   case Builtin::BI__builtin_canonicalizef:
2795   case Builtin::BI__builtin_canonicalizel:
2796     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2797 
2798   case Builtin::BI__builtin_thread_pointer: {
2799     if (!getContext().getTargetInfo().isTLSSupported())
2800       CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
2801     // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
2802     break;
2803   }
2804   case Builtin::BI__builtin_os_log_format: {
2805     assert(E->getNumArgs() >= 2 &&
2806            "__builtin_os_log_format takes at least 2 arguments");
2807     analyze_os_log::OSLogBufferLayout Layout;
2808     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2809     Address BufAddr = EmitPointerWithAlignment(E->getArg(0));
2810     // Ignore argument 1, the format string. It is not currently used.
2811     CharUnits Offset;
2812     Builder.CreateStore(
2813         Builder.getInt8(Layout.getSummaryByte()),
2814         Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2815     Builder.CreateStore(
2816         Builder.getInt8(Layout.getNumArgsByte()),
2817         Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2818 
2819     llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2820     for (const auto &Item : Layout.Items) {
2821       Builder.CreateStore(
2822           Builder.getInt8(Item.getDescriptorByte()),
2823           Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2824       Builder.CreateStore(
2825           Builder.getInt8(Item.getSizeByte()),
2826           Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2827       Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset);
2828       if (const Expr *TheExpr = Item.getExpr()) {
2829         Addr = Builder.CreateElementBitCast(
2830             Addr, ConvertTypeForMem(TheExpr->getType()));
2831         // Check if this is a retainable type.
2832         if (TheExpr->getType()->isObjCRetainableType()) {
2833           assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2834                  "Only scalar can be a ObjC retainable type");
2835           llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2836           RValue RV = RValue::get(SV);
2837           LValue LV = MakeAddrLValue(Addr, TheExpr->getType());
2838           EmitStoreThroughLValue(RV, LV);
2839           // Check if the object is constant, if not, save it in
2840           // RetainableOperands.
2841           if (!isa<Constant>(SV))
2842             RetainableOperands.push_back(SV);
2843         } else {
2844           EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true);
2845         }
2846       } else {
2847         Addr = Builder.CreateElementBitCast(Addr, Int32Ty);
2848         Builder.CreateStore(
2849             Builder.getInt32(Item.getConstValue().getQuantity()), Addr);
2850       }
2851       Offset += Item.size();
2852     }
2853 
2854     // Push a clang.arc.use cleanup for each object in RetainableOperands. The
2855     // cleanup will cause the use to appear after the final log call, keeping
2856     // the object valid while it's held in the log buffer.  Note that if there's
2857     // a release cleanup on the object, it will already be active; since
2858     // cleanups are emitted in reverse order, the use will occur before the
2859     // object is released.
2860     if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
2861         CGM.getCodeGenOpts().OptimizationLevel != 0)
2862       for (llvm::Value *object : RetainableOperands)
2863         pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object);
2864 
2865     return RValue::get(BufAddr.getPointer());
2866   }
2867 
2868   case Builtin::BI__builtin_os_log_format_buffer_size: {
2869     analyze_os_log::OSLogBufferLayout Layout;
2870     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2871     return RValue::get(ConstantInt::get(ConvertType(E->getType()),
2872                                         Layout.size().getQuantity()));
2873   }
2874 
2875   case Builtin::BI__xray_customevent: {
2876     if (!ShouldXRayInstrumentFunction())
2877       return RValue::getIgnored();
2878     if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) {
2879       if (XRayAttr->neverXRayInstrument())
2880         return RValue::getIgnored();
2881     }
2882     Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
2883     auto FTy = F->getFunctionType();
2884     auto Arg0 = E->getArg(0);
2885     auto Arg0Val = EmitScalarExpr(Arg0);
2886     auto Arg0Ty = Arg0->getType();
2887     auto PTy0 = FTy->getParamType(0);
2888     if (PTy0 != Arg0Val->getType()) {
2889       if (Arg0Ty->isArrayType())
2890         Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
2891       else
2892         Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
2893     }
2894     auto Arg1 = EmitScalarExpr(E->getArg(1));
2895     auto PTy1 = FTy->getParamType(1);
2896     if (PTy1 != Arg1->getType())
2897       Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
2898     return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
2899   }
2900 
2901   case Builtin::BI__builtin_ms_va_start:
2902   case Builtin::BI__builtin_ms_va_end:
2903     return RValue::get(
2904         EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
2905                        BuiltinID == Builtin::BI__builtin_ms_va_start));
2906 
2907   case Builtin::BI__builtin_ms_va_copy: {
2908     // Lower this manually. We can't reliably determine whether or not any
2909     // given va_copy() is for a Win64 va_list from the calling convention
2910     // alone, because it's legal to do this from a System V ABI function.
2911     // With opaque pointer types, we won't have enough information in LLVM
2912     // IR to determine this from the argument types, either. Best to do it
2913     // now, while we have enough information.
2914     Address DestAddr = EmitMSVAListRef(E->getArg(0));
2915     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
2916 
2917     llvm::Type *BPP = Int8PtrPtrTy;
2918 
2919     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
2920                        DestAddr.getAlignment());
2921     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
2922                       SrcAddr.getAlignment());
2923 
2924     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
2925     return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
2926   }
2927   }
2928 
2929   // If this is an alias for a lib function (e.g. __builtin_sin), emit
2930   // the call using the normal call path, but using the unmangled
2931   // version of the function name.
2932   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
2933     return emitLibraryCall(*this, FD, E,
2934                            CGM.getBuiltinLibFunction(FD, BuiltinID));
2935 
2936   // If this is a predefined lib function (e.g. malloc), emit the call
2937   // using exactly the normal call path.
2938   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
2939     return emitLibraryCall(*this, FD, E,
2940                       cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
2941 
2942   // Check that a call to a target specific builtin has the correct target
2943   // features.
2944   // This is down here to avoid non-target specific builtins, however, if
2945   // generic builtins start to require generic target features then we
2946   // can move this up to the beginning of the function.
2947   checkTargetFeatures(E, FD);
2948 
2949   // See if we have a target specific intrinsic.
2950   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
2951   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
2952   StringRef Prefix =
2953       llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
2954   if (!Prefix.empty()) {
2955     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
2956     // NOTE we dont need to perform a compatibility flag check here since the
2957     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
2958     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
2959     if (IntrinsicID == Intrinsic::not_intrinsic)
2960       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
2961   }
2962 
2963   if (IntrinsicID != Intrinsic::not_intrinsic) {
2964     SmallVector<Value*, 16> Args;
2965 
2966     // Find out if any arguments are required to be integer constant
2967     // expressions.
2968     unsigned ICEArguments = 0;
2969     ASTContext::GetBuiltinTypeError Error;
2970     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2971     assert(Error == ASTContext::GE_None && "Should not codegen an error");
2972 
2973     Function *F = CGM.getIntrinsic(IntrinsicID);
2974     llvm::FunctionType *FTy = F->getFunctionType();
2975 
2976     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
2977       Value *ArgValue;
2978       // If this is a normal argument, just emit it as a scalar.
2979       if ((ICEArguments & (1 << i)) == 0) {
2980         ArgValue = EmitScalarExpr(E->getArg(i));
2981       } else {
2982         // If this is required to be a constant, constant fold it so that we
2983         // know that the generated intrinsic gets a ConstantInt.
2984         llvm::APSInt Result;
2985         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2986         assert(IsConst && "Constant arg isn't actually constant?");
2987         (void)IsConst;
2988         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2989       }
2990 
2991       // If the intrinsic arg type is different from the builtin arg type
2992       // we need to do a bit cast.
2993       llvm::Type *PTy = FTy->getParamType(i);
2994       if (PTy != ArgValue->getType()) {
2995         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2996                "Must be able to losslessly bit cast to param");
2997         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2998       }
2999 
3000       Args.push_back(ArgValue);
3001     }
3002 
3003     Value *V = Builder.CreateCall(F, Args);
3004     QualType BuiltinRetType = E->getType();
3005 
3006     llvm::Type *RetTy = VoidTy;
3007     if (!BuiltinRetType->isVoidType())
3008       RetTy = ConvertType(BuiltinRetType);
3009 
3010     if (RetTy != V->getType()) {
3011       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
3012              "Must be able to losslessly bit cast result type");
3013       V = Builder.CreateBitCast(V, RetTy);
3014     }
3015 
3016     return RValue::get(V);
3017   }
3018 
3019   // See if we have a target specific builtin that needs to be lowered.
3020   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
3021     return RValue::get(V);
3022 
3023   ErrorUnsupported(E, "builtin function");
3024 
3025   // Unknown builtin, for now just dump it out and return undef.
3026   return GetUndefRValue(E->getType());
3027 }
3028 
3029 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
3030                                         unsigned BuiltinID, const CallExpr *E,
3031                                         llvm::Triple::ArchType Arch) {
3032   switch (Arch) {
3033   case llvm::Triple::arm:
3034   case llvm::Triple::armeb:
3035   case llvm::Triple::thumb:
3036   case llvm::Triple::thumbeb:
3037     return CGF->EmitARMBuiltinExpr(BuiltinID, E);
3038   case llvm::Triple::aarch64:
3039   case llvm::Triple::aarch64_be:
3040     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
3041   case llvm::Triple::x86:
3042   case llvm::Triple::x86_64:
3043     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
3044   case llvm::Triple::ppc:
3045   case llvm::Triple::ppc64:
3046   case llvm::Triple::ppc64le:
3047     return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
3048   case llvm::Triple::r600:
3049   case llvm::Triple::amdgcn:
3050     return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
3051   case llvm::Triple::systemz:
3052     return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
3053   case llvm::Triple::nvptx:
3054   case llvm::Triple::nvptx64:
3055     return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
3056   case llvm::Triple::wasm32:
3057   case llvm::Triple::wasm64:
3058     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
3059   default:
3060     return nullptr;
3061   }
3062 }
3063 
3064 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
3065                                               const CallExpr *E) {
3066   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
3067     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
3068     return EmitTargetArchBuiltinExpr(
3069         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
3070         getContext().getAuxTargetInfo()->getTriple().getArch());
3071   }
3072 
3073   return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
3074                                    getTarget().getTriple().getArch());
3075 }
3076 
3077 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
3078                                      NeonTypeFlags TypeFlags,
3079                                      bool V1Ty=false) {
3080   int IsQuad = TypeFlags.isQuad();
3081   switch (TypeFlags.getEltType()) {
3082   case NeonTypeFlags::Int8:
3083   case NeonTypeFlags::Poly8:
3084     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
3085   case NeonTypeFlags::Int16:
3086   case NeonTypeFlags::Poly16:
3087   case NeonTypeFlags::Float16:
3088     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
3089   case NeonTypeFlags::Int32:
3090     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
3091   case NeonTypeFlags::Int64:
3092   case NeonTypeFlags::Poly64:
3093     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
3094   case NeonTypeFlags::Poly128:
3095     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
3096     // There is a lot of i128 and f128 API missing.
3097     // so we use v16i8 to represent poly128 and get pattern matched.
3098     return llvm::VectorType::get(CGF->Int8Ty, 16);
3099   case NeonTypeFlags::Float32:
3100     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
3101   case NeonTypeFlags::Float64:
3102     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
3103   }
3104   llvm_unreachable("Unknown vector element type!");
3105 }
3106 
3107 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
3108                                           NeonTypeFlags IntTypeFlags) {
3109   int IsQuad = IntTypeFlags.isQuad();
3110   switch (IntTypeFlags.getEltType()) {
3111   case NeonTypeFlags::Int32:
3112     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
3113   case NeonTypeFlags::Int64:
3114     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
3115   default:
3116     llvm_unreachable("Type can't be converted to floating-point!");
3117   }
3118 }
3119 
3120 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
3121   unsigned nElts = V->getType()->getVectorNumElements();
3122   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
3123   return Builder.CreateShuffleVector(V, V, SV, "lane");
3124 }
3125 
3126 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
3127                                      const char *name,
3128                                      unsigned shift, bool rightshift) {
3129   unsigned j = 0;
3130   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3131        ai != ae; ++ai, ++j)
3132     if (shift > 0 && shift == j)
3133       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
3134     else
3135       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
3136 
3137   return Builder.CreateCall(F, Ops, name);
3138 }
3139 
3140 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
3141                                             bool neg) {
3142   int SV = cast<ConstantInt>(V)->getSExtValue();
3143   return ConstantInt::get(Ty, neg ? -SV : SV);
3144 }
3145 
3146 // \brief Right-shift a vector by a constant.
3147 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
3148                                           llvm::Type *Ty, bool usgn,
3149                                           const char *name) {
3150   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
3151 
3152   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
3153   int EltSize = VTy->getScalarSizeInBits();
3154 
3155   Vec = Builder.CreateBitCast(Vec, Ty);
3156 
3157   // lshr/ashr are undefined when the shift amount is equal to the vector
3158   // element size.
3159   if (ShiftAmt == EltSize) {
3160     if (usgn) {
3161       // Right-shifting an unsigned value by its size yields 0.
3162       return llvm::ConstantAggregateZero::get(VTy);
3163     } else {
3164       // Right-shifting a signed value by its size is equivalent
3165       // to a shift of size-1.
3166       --ShiftAmt;
3167       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
3168     }
3169   }
3170 
3171   Shift = EmitNeonShiftVector(Shift, Ty, false);
3172   if (usgn)
3173     return Builder.CreateLShr(Vec, Shift, name);
3174   else
3175     return Builder.CreateAShr(Vec, Shift, name);
3176 }
3177 
3178 enum {
3179   AddRetType = (1 << 0),
3180   Add1ArgType = (1 << 1),
3181   Add2ArgTypes = (1 << 2),
3182 
3183   VectorizeRetType = (1 << 3),
3184   VectorizeArgTypes = (1 << 4),
3185 
3186   InventFloatType = (1 << 5),
3187   UnsignedAlts = (1 << 6),
3188 
3189   Use64BitVectors = (1 << 7),
3190   Use128BitVectors = (1 << 8),
3191 
3192   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
3193   VectorRet = AddRetType | VectorizeRetType,
3194   VectorRetGetArgs01 =
3195       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
3196   FpCmpzModifiers =
3197       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
3198 };
3199 
3200 namespace {
3201 struct NeonIntrinsicInfo {
3202   const char *NameHint;
3203   unsigned BuiltinID;
3204   unsigned LLVMIntrinsic;
3205   unsigned AltLLVMIntrinsic;
3206   unsigned TypeModifier;
3207 
3208   bool operator<(unsigned RHSBuiltinID) const {
3209     return BuiltinID < RHSBuiltinID;
3210   }
3211   bool operator<(const NeonIntrinsicInfo &TE) const {
3212     return BuiltinID < TE.BuiltinID;
3213   }
3214 };
3215 } // end anonymous namespace
3216 
3217 #define NEONMAP0(NameBase) \
3218   { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
3219 
3220 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
3221   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3222       Intrinsic::LLVMIntrinsic, 0, TypeModifier }
3223 
3224 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
3225   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3226       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
3227       TypeModifier }
3228 
3229 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
3230   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3231   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3232   NEONMAP1(vabs_v, arm_neon_vabs, 0),
3233   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
3234   NEONMAP0(vaddhn_v),
3235   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
3236   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
3237   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
3238   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
3239   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
3240   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
3241   NEONMAP1(vcage_v, arm_neon_vacge, 0),
3242   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
3243   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
3244   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
3245   NEONMAP1(vcale_v, arm_neon_vacge, 0),
3246   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
3247   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
3248   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
3249   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
3250   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
3251   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3252   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3253   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3254   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3255   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
3256   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
3257   NEONMAP0(vcvt_f32_v),
3258   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3259   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3260   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3261   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3262   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3263   NEONMAP0(vcvt_s32_v),
3264   NEONMAP0(vcvt_s64_v),
3265   NEONMAP0(vcvt_u32_v),
3266   NEONMAP0(vcvt_u64_v),
3267   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
3268   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
3269   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
3270   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
3271   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
3272   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
3273   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
3274   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
3275   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
3276   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
3277   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
3278   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
3279   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
3280   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
3281   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
3282   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
3283   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
3284   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
3285   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
3286   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
3287   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
3288   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
3289   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
3290   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
3291   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
3292   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
3293   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
3294   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
3295   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
3296   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
3297   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
3298   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
3299   NEONMAP0(vcvtq_f32_v),
3300   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3301   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3302   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3303   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3304   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3305   NEONMAP0(vcvtq_s32_v),
3306   NEONMAP0(vcvtq_s64_v),
3307   NEONMAP0(vcvtq_u32_v),
3308   NEONMAP0(vcvtq_u64_v),
3309   NEONMAP0(vext_v),
3310   NEONMAP0(vextq_v),
3311   NEONMAP0(vfma_v),
3312   NEONMAP0(vfmaq_v),
3313   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3314   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3315   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3316   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3317   NEONMAP0(vld1_dup_v),
3318   NEONMAP1(vld1_v, arm_neon_vld1, 0),
3319   NEONMAP0(vld1q_dup_v),
3320   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
3321   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
3322   NEONMAP1(vld2_v, arm_neon_vld2, 0),
3323   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
3324   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
3325   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
3326   NEONMAP1(vld3_v, arm_neon_vld3, 0),
3327   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
3328   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
3329   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
3330   NEONMAP1(vld4_v, arm_neon_vld4, 0),
3331   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
3332   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
3333   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3334   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
3335   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
3336   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3337   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3338   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
3339   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
3340   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3341   NEONMAP0(vmovl_v),
3342   NEONMAP0(vmovn_v),
3343   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
3344   NEONMAP0(vmull_v),
3345   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
3346   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3347   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3348   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
3349   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3350   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3351   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
3352   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
3353   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
3354   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
3355   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
3356   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3357   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3358   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
3359   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
3360   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
3361   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
3362   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
3363   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
3364   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
3365   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
3366   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
3367   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
3368   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
3369   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3370   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3371   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3372   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3373   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3374   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3375   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
3376   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
3377   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3378   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3379   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
3380   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3381   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3382   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
3383   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
3384   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3385   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3386   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
3387   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
3388   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
3389   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
3390   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
3391   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
3392   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
3393   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
3394   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
3395   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
3396   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
3397   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
3398   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3399   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3400   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3401   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3402   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3403   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3404   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
3405   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
3406   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
3407   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
3408   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
3409   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
3410   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
3411   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
3412   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
3413   NEONMAP0(vshl_n_v),
3414   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3415   NEONMAP0(vshll_n_v),
3416   NEONMAP0(vshlq_n_v),
3417   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3418   NEONMAP0(vshr_n_v),
3419   NEONMAP0(vshrn_n_v),
3420   NEONMAP0(vshrq_n_v),
3421   NEONMAP1(vst1_v, arm_neon_vst1, 0),
3422   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
3423   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
3424   NEONMAP1(vst2_v, arm_neon_vst2, 0),
3425   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
3426   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
3427   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
3428   NEONMAP1(vst3_v, arm_neon_vst3, 0),
3429   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
3430   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
3431   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
3432   NEONMAP1(vst4_v, arm_neon_vst4, 0),
3433   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
3434   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
3435   NEONMAP0(vsubhn_v),
3436   NEONMAP0(vtrn_v),
3437   NEONMAP0(vtrnq_v),
3438   NEONMAP0(vtst_v),
3439   NEONMAP0(vtstq_v),
3440   NEONMAP0(vuzp_v),
3441   NEONMAP0(vuzpq_v),
3442   NEONMAP0(vzip_v),
3443   NEONMAP0(vzipq_v)
3444 };
3445 
3446 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
3447   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
3448   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
3449   NEONMAP0(vaddhn_v),
3450   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
3451   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
3452   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
3453   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
3454   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
3455   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
3456   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
3457   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
3458   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
3459   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
3460   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
3461   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
3462   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
3463   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
3464   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3465   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3466   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3467   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3468   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
3469   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
3470   NEONMAP0(vcvt_f32_v),
3471   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3472   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3473   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3474   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3475   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3476   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3477   NEONMAP0(vcvtq_f32_v),
3478   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3479   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3480   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3481   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3482   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3483   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3484   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
3485   NEONMAP0(vext_v),
3486   NEONMAP0(vextq_v),
3487   NEONMAP0(vfma_v),
3488   NEONMAP0(vfmaq_v),
3489   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3490   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3491   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3492   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3493   NEONMAP0(vmovl_v),
3494   NEONMAP0(vmovn_v),
3495   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
3496   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
3497   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
3498   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3499   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3500   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
3501   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
3502   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
3503   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3504   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3505   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
3506   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
3507   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
3508   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
3509   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
3510   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
3511   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
3512   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
3513   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
3514   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
3515   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
3516   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3517   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3518   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
3519   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3520   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
3521   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3522   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
3523   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
3524   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3525   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3526   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
3527   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3528   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3529   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
3530   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
3531   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3532   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3533   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3534   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3535   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3536   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3537   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3538   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3539   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
3540   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
3541   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
3542   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
3543   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
3544   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
3545   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
3546   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
3547   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
3548   NEONMAP0(vshl_n_v),
3549   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3550   NEONMAP0(vshll_n_v),
3551   NEONMAP0(vshlq_n_v),
3552   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3553   NEONMAP0(vshr_n_v),
3554   NEONMAP0(vshrn_n_v),
3555   NEONMAP0(vshrq_n_v),
3556   NEONMAP0(vsubhn_v),
3557   NEONMAP0(vtst_v),
3558   NEONMAP0(vtstq_v),
3559 };
3560 
3561 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3562   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3563   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3564   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3565   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3566   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3567   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3568   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3569   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3570   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3571   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3572   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3573   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3574   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3575   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3576   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3577   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3578   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3579   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3580   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3581   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3582   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3583   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3584   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3585   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3586   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3587   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3588   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3589   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3590   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3591   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3592   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3593   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3594   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3595   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3596   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3597   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3598   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3599   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3600   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3601   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3602   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3603   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3604   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3605   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3606   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3607   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3608   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3609   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3610   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3611   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3612   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3613   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3614   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3615   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3616   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3617   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3618   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3619   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3620   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3621   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3622   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3623   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3624   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3625   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3626   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3627   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3628   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3629   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3630   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3631   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3632   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3633   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3634   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3635   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3636   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3637   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3638   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3639   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3640   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3641   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3642   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3643   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3644   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3645   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3646   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3647   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3648   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3649   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3650   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3651   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3652   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3653   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3654   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3655   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3656   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3657   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3658   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3659   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3660   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3661   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3662   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3663   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3664   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3665   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3666   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3667   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3668   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3669   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3670   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3671   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3672   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3673   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3674   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3675   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3676   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3677   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3678   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3679   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3680   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3681   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3682   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3683   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3684   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3685   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3686   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3687   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3688   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3689   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3690   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3691   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3692   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3693   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3694   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3695   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3696   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3697   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3698   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3699   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3700   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3701   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3702   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3703   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3704   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3705   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3706   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3707   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3708   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3709   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3710   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3711   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3712   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3713   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3714   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3715   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3716   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3717   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3718   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3719   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3720   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3721   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3722   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3723   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3724   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3725   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3726   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3727   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3728   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3729   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3730   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3731   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3732   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3733   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3734   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3735   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3736   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3737   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3738   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3739   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3740   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3741   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3742   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3743   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3744   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3745   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3746   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3747   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3748   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3749   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3750   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3751   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3752   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3753   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3754 };
3755 
3756 #undef NEONMAP0
3757 #undef NEONMAP1
3758 #undef NEONMAP2
3759 
3760 static bool NEONSIMDIntrinsicsProvenSorted = false;
3761 
3762 static bool AArch64SIMDIntrinsicsProvenSorted = false;
3763 static bool AArch64SISDIntrinsicsProvenSorted = false;
3764 
3765 
3766 static const NeonIntrinsicInfo *
3767 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
3768                        unsigned BuiltinID, bool &MapProvenSorted) {
3769 
3770 #ifndef NDEBUG
3771   if (!MapProvenSorted) {
3772     assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3773     MapProvenSorted = true;
3774   }
3775 #endif
3776 
3777   const NeonIntrinsicInfo *Builtin =
3778       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3779 
3780   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
3781     return Builtin;
3782 
3783   return nullptr;
3784 }
3785 
3786 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3787                                                    unsigned Modifier,
3788                                                    llvm::Type *ArgType,
3789                                                    const CallExpr *E) {
3790   int VectorSize = 0;
3791   if (Modifier & Use64BitVectors)
3792     VectorSize = 64;
3793   else if (Modifier & Use128BitVectors)
3794     VectorSize = 128;
3795 
3796   // Return type.
3797   SmallVector<llvm::Type *, 3> Tys;
3798   if (Modifier & AddRetType) {
3799     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
3800     if (Modifier & VectorizeRetType)
3801       Ty = llvm::VectorType::get(
3802           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
3803 
3804     Tys.push_back(Ty);
3805   }
3806 
3807   // Arguments.
3808   if (Modifier & VectorizeArgTypes) {
3809     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
3810     ArgType = llvm::VectorType::get(ArgType, Elts);
3811   }
3812 
3813   if (Modifier & (Add1ArgType | Add2ArgTypes))
3814     Tys.push_back(ArgType);
3815 
3816   if (Modifier & Add2ArgTypes)
3817     Tys.push_back(ArgType);
3818 
3819   if (Modifier & InventFloatType)
3820     Tys.push_back(FloatTy);
3821 
3822   return CGM.getIntrinsic(IntrinsicID, Tys);
3823 }
3824 
3825 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
3826                                             const NeonIntrinsicInfo &SISDInfo,
3827                                             SmallVectorImpl<Value *> &Ops,
3828                                             const CallExpr *E) {
3829   unsigned BuiltinID = SISDInfo.BuiltinID;
3830   unsigned int Int = SISDInfo.LLVMIntrinsic;
3831   unsigned Modifier = SISDInfo.TypeModifier;
3832   const char *s = SISDInfo.NameHint;
3833 
3834   switch (BuiltinID) {
3835   case NEON::BI__builtin_neon_vcled_s64:
3836   case NEON::BI__builtin_neon_vcled_u64:
3837   case NEON::BI__builtin_neon_vcles_f32:
3838   case NEON::BI__builtin_neon_vcled_f64:
3839   case NEON::BI__builtin_neon_vcltd_s64:
3840   case NEON::BI__builtin_neon_vcltd_u64:
3841   case NEON::BI__builtin_neon_vclts_f32:
3842   case NEON::BI__builtin_neon_vcltd_f64:
3843   case NEON::BI__builtin_neon_vcales_f32:
3844   case NEON::BI__builtin_neon_vcaled_f64:
3845   case NEON::BI__builtin_neon_vcalts_f32:
3846   case NEON::BI__builtin_neon_vcaltd_f64:
3847     // Only one direction of comparisons actually exist, cmle is actually a cmge
3848     // with swapped operands. The table gives us the right intrinsic but we
3849     // still need to do the swap.
3850     std::swap(Ops[0], Ops[1]);
3851     break;
3852   }
3853 
3854   assert(Int && "Generic code assumes a valid intrinsic");
3855 
3856   // Determine the type(s) of this overloaded AArch64 intrinsic.
3857   const Expr *Arg = E->getArg(0);
3858   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3859   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3860 
3861   int j = 0;
3862   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3863   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3864        ai != ae; ++ai, ++j) {
3865     llvm::Type *ArgTy = ai->getType();
3866     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
3867              ArgTy->getPrimitiveSizeInBits())
3868       continue;
3869 
3870     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
3871     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
3872     // it before inserting.
3873     Ops[j] =
3874         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
3875     Ops[j] =
3876         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
3877   }
3878 
3879   Value *Result = CGF.EmitNeonCall(F, Ops, s);
3880   llvm::Type *ResultType = CGF.ConvertType(E->getType());
3881   if (ResultType->getPrimitiveSizeInBits() <
3882       Result->getType()->getPrimitiveSizeInBits())
3883     return CGF.Builder.CreateExtractElement(Result, C0);
3884 
3885   return CGF.Builder.CreateBitCast(Result, ResultType, s);
3886 }
3887 
3888 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
3889     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
3890     const char *NameHint, unsigned Modifier, const CallExpr *E,
3891     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
3892   // Get the last argument, which specifies the vector type.
3893   llvm::APSInt NeonTypeConst;
3894   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3895   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
3896     return nullptr;
3897 
3898   // Determine the type of this overloaded NEON intrinsic.
3899   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
3900   bool Usgn = Type.isUnsigned();
3901   bool Quad = Type.isQuad();
3902 
3903   llvm::VectorType *VTy = GetNeonType(this, Type);
3904   llvm::Type *Ty = VTy;
3905   if (!Ty)
3906     return nullptr;
3907 
3908   auto getAlignmentValue32 = [&](Address addr) -> Value* {
3909     return Builder.getInt32(addr.getAlignment().getQuantity());
3910   };
3911 
3912   unsigned Int = LLVMIntrinsic;
3913   if ((Modifier & UnsignedAlts) && !Usgn)
3914     Int = AltLLVMIntrinsic;
3915 
3916   switch (BuiltinID) {
3917   default: break;
3918   case NEON::BI__builtin_neon_vabs_v:
3919   case NEON::BI__builtin_neon_vabsq_v:
3920     if (VTy->getElementType()->isFloatingPointTy())
3921       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
3922     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
3923   case NEON::BI__builtin_neon_vaddhn_v: {
3924     llvm::VectorType *SrcTy =
3925         llvm::VectorType::getExtendedElementVectorType(VTy);
3926 
3927     // %sum = add <4 x i32> %lhs, %rhs
3928     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3929     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3930     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3931 
3932     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3933     Constant *ShiftAmt =
3934         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3935     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3936 
3937     // %res = trunc <4 x i32> %high to <4 x i16>
3938     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3939   }
3940   case NEON::BI__builtin_neon_vcale_v:
3941   case NEON::BI__builtin_neon_vcaleq_v:
3942   case NEON::BI__builtin_neon_vcalt_v:
3943   case NEON::BI__builtin_neon_vcaltq_v:
3944     std::swap(Ops[0], Ops[1]);
3945     LLVM_FALLTHROUGH;
3946   case NEON::BI__builtin_neon_vcage_v:
3947   case NEON::BI__builtin_neon_vcageq_v:
3948   case NEON::BI__builtin_neon_vcagt_v:
3949   case NEON::BI__builtin_neon_vcagtq_v: {
3950     llvm::Type *VecFlt = llvm::VectorType::get(
3951         VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
3952         VTy->getNumElements());
3953     llvm::Type *Tys[] = { VTy, VecFlt };
3954     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3955     return EmitNeonCall(F, Ops, NameHint);
3956   }
3957   case NEON::BI__builtin_neon_vclz_v:
3958   case NEON::BI__builtin_neon_vclzq_v:
3959     // We generate target-independent intrinsic, which needs a second argument
3960     // for whether or not clz of zero is undefined; on ARM it isn't.
3961     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3962     break;
3963   case NEON::BI__builtin_neon_vcvt_f32_v:
3964   case NEON::BI__builtin_neon_vcvtq_f32_v:
3965     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3966     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3967     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3968                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3969   case NEON::BI__builtin_neon_vcvt_n_f32_v:
3970   case NEON::BI__builtin_neon_vcvt_n_f64_v:
3971   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3972   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3973     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3974     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
3975     Function *F = CGM.getIntrinsic(Int, Tys);
3976     return EmitNeonCall(F, Ops, "vcvt_n");
3977   }
3978   case NEON::BI__builtin_neon_vcvt_n_s32_v:
3979   case NEON::BI__builtin_neon_vcvt_n_u32_v:
3980   case NEON::BI__builtin_neon_vcvt_n_s64_v:
3981   case NEON::BI__builtin_neon_vcvt_n_u64_v:
3982   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3983   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3984   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3985   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3986     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3987     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3988     return EmitNeonCall(F, Ops, "vcvt_n");
3989   }
3990   case NEON::BI__builtin_neon_vcvt_s32_v:
3991   case NEON::BI__builtin_neon_vcvt_u32_v:
3992   case NEON::BI__builtin_neon_vcvt_s64_v:
3993   case NEON::BI__builtin_neon_vcvt_u64_v:
3994   case NEON::BI__builtin_neon_vcvtq_s32_v:
3995   case NEON::BI__builtin_neon_vcvtq_u32_v:
3996   case NEON::BI__builtin_neon_vcvtq_s64_v:
3997   case NEON::BI__builtin_neon_vcvtq_u64_v: {
3998     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3999     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
4000                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
4001   }
4002   case NEON::BI__builtin_neon_vcvta_s32_v:
4003   case NEON::BI__builtin_neon_vcvta_s64_v:
4004   case NEON::BI__builtin_neon_vcvta_u32_v:
4005   case NEON::BI__builtin_neon_vcvta_u64_v:
4006   case NEON::BI__builtin_neon_vcvtaq_s32_v:
4007   case NEON::BI__builtin_neon_vcvtaq_s64_v:
4008   case NEON::BI__builtin_neon_vcvtaq_u32_v:
4009   case NEON::BI__builtin_neon_vcvtaq_u64_v:
4010   case NEON::BI__builtin_neon_vcvtn_s32_v:
4011   case NEON::BI__builtin_neon_vcvtn_s64_v:
4012   case NEON::BI__builtin_neon_vcvtn_u32_v:
4013   case NEON::BI__builtin_neon_vcvtn_u64_v:
4014   case NEON::BI__builtin_neon_vcvtnq_s32_v:
4015   case NEON::BI__builtin_neon_vcvtnq_s64_v:
4016   case NEON::BI__builtin_neon_vcvtnq_u32_v:
4017   case NEON::BI__builtin_neon_vcvtnq_u64_v:
4018   case NEON::BI__builtin_neon_vcvtp_s32_v:
4019   case NEON::BI__builtin_neon_vcvtp_s64_v:
4020   case NEON::BI__builtin_neon_vcvtp_u32_v:
4021   case NEON::BI__builtin_neon_vcvtp_u64_v:
4022   case NEON::BI__builtin_neon_vcvtpq_s32_v:
4023   case NEON::BI__builtin_neon_vcvtpq_s64_v:
4024   case NEON::BI__builtin_neon_vcvtpq_u32_v:
4025   case NEON::BI__builtin_neon_vcvtpq_u64_v:
4026   case NEON::BI__builtin_neon_vcvtm_s32_v:
4027   case NEON::BI__builtin_neon_vcvtm_s64_v:
4028   case NEON::BI__builtin_neon_vcvtm_u32_v:
4029   case NEON::BI__builtin_neon_vcvtm_u64_v:
4030   case NEON::BI__builtin_neon_vcvtmq_s32_v:
4031   case NEON::BI__builtin_neon_vcvtmq_s64_v:
4032   case NEON::BI__builtin_neon_vcvtmq_u32_v:
4033   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
4034     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
4035     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
4036   }
4037   case NEON::BI__builtin_neon_vext_v:
4038   case NEON::BI__builtin_neon_vextq_v: {
4039     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
4040     SmallVector<uint32_t, 16> Indices;
4041     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4042       Indices.push_back(i+CV);
4043 
4044     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4045     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4046     return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
4047   }
4048   case NEON::BI__builtin_neon_vfma_v:
4049   case NEON::BI__builtin_neon_vfmaq_v: {
4050     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
4051     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4052     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4053     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4054 
4055     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
4056     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
4057   }
4058   case NEON::BI__builtin_neon_vld1_v:
4059   case NEON::BI__builtin_neon_vld1q_v: {
4060     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4061     Ops.push_back(getAlignmentValue32(PtrOp0));
4062     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
4063   }
4064   case NEON::BI__builtin_neon_vld2_v:
4065   case NEON::BI__builtin_neon_vld2q_v:
4066   case NEON::BI__builtin_neon_vld3_v:
4067   case NEON::BI__builtin_neon_vld3q_v:
4068   case NEON::BI__builtin_neon_vld4_v:
4069   case NEON::BI__builtin_neon_vld4q_v: {
4070     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4071     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4072     Value *Align = getAlignmentValue32(PtrOp1);
4073     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
4074     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4075     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4076     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4077   }
4078   case NEON::BI__builtin_neon_vld1_dup_v:
4079   case NEON::BI__builtin_neon_vld1q_dup_v: {
4080     Value *V = UndefValue::get(Ty);
4081     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
4082     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
4083     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
4084     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4085     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
4086     return EmitNeonSplat(Ops[0], CI);
4087   }
4088   case NEON::BI__builtin_neon_vld2_lane_v:
4089   case NEON::BI__builtin_neon_vld2q_lane_v:
4090   case NEON::BI__builtin_neon_vld3_lane_v:
4091   case NEON::BI__builtin_neon_vld3q_lane_v:
4092   case NEON::BI__builtin_neon_vld4_lane_v:
4093   case NEON::BI__builtin_neon_vld4q_lane_v: {
4094     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4095     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4096     for (unsigned I = 2; I < Ops.size() - 1; ++I)
4097       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
4098     Ops.push_back(getAlignmentValue32(PtrOp1));
4099     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
4100     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4101     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4102     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4103   }
4104   case NEON::BI__builtin_neon_vmovl_v: {
4105     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
4106     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
4107     if (Usgn)
4108       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
4109     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
4110   }
4111   case NEON::BI__builtin_neon_vmovn_v: {
4112     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4113     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
4114     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
4115   }
4116   case NEON::BI__builtin_neon_vmull_v:
4117     // FIXME: the integer vmull operations could be emitted in terms of pure
4118     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
4119     // hoisting the exts outside loops. Until global ISel comes along that can
4120     // see through such movement this leads to bad CodeGen. So we need an
4121     // intrinsic for now.
4122     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
4123     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
4124     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
4125   case NEON::BI__builtin_neon_vpadal_v:
4126   case NEON::BI__builtin_neon_vpadalq_v: {
4127     // The source operand type has twice as many elements of half the size.
4128     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4129     llvm::Type *EltTy =
4130       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4131     llvm::Type *NarrowTy =
4132       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4133     llvm::Type *Tys[2] = { Ty, NarrowTy };
4134     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
4135   }
4136   case NEON::BI__builtin_neon_vpaddl_v:
4137   case NEON::BI__builtin_neon_vpaddlq_v: {
4138     // The source operand type has twice as many elements of half the size.
4139     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4140     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4141     llvm::Type *NarrowTy =
4142       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4143     llvm::Type *Tys[2] = { Ty, NarrowTy };
4144     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
4145   }
4146   case NEON::BI__builtin_neon_vqdmlal_v:
4147   case NEON::BI__builtin_neon_vqdmlsl_v: {
4148     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
4149     Ops[1] =
4150         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
4151     Ops.resize(2);
4152     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
4153   }
4154   case NEON::BI__builtin_neon_vqshl_n_v:
4155   case NEON::BI__builtin_neon_vqshlq_n_v:
4156     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
4157                         1, false);
4158   case NEON::BI__builtin_neon_vqshlu_n_v:
4159   case NEON::BI__builtin_neon_vqshluq_n_v:
4160     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
4161                         1, false);
4162   case NEON::BI__builtin_neon_vrecpe_v:
4163   case NEON::BI__builtin_neon_vrecpeq_v:
4164   case NEON::BI__builtin_neon_vrsqrte_v:
4165   case NEON::BI__builtin_neon_vrsqrteq_v:
4166     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
4167     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
4168 
4169   case NEON::BI__builtin_neon_vrshr_n_v:
4170   case NEON::BI__builtin_neon_vrshrq_n_v:
4171     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
4172                         1, true);
4173   case NEON::BI__builtin_neon_vshl_n_v:
4174   case NEON::BI__builtin_neon_vshlq_n_v:
4175     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
4176     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
4177                              "vshl_n");
4178   case NEON::BI__builtin_neon_vshll_n_v: {
4179     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
4180     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4181     if (Usgn)
4182       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
4183     else
4184       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
4185     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
4186     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
4187   }
4188   case NEON::BI__builtin_neon_vshrn_n_v: {
4189     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4190     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4191     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
4192     if (Usgn)
4193       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
4194     else
4195       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
4196     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
4197   }
4198   case NEON::BI__builtin_neon_vshr_n_v:
4199   case NEON::BI__builtin_neon_vshrq_n_v:
4200     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
4201   case NEON::BI__builtin_neon_vst1_v:
4202   case NEON::BI__builtin_neon_vst1q_v:
4203   case NEON::BI__builtin_neon_vst2_v:
4204   case NEON::BI__builtin_neon_vst2q_v:
4205   case NEON::BI__builtin_neon_vst3_v:
4206   case NEON::BI__builtin_neon_vst3q_v:
4207   case NEON::BI__builtin_neon_vst4_v:
4208   case NEON::BI__builtin_neon_vst4q_v:
4209   case NEON::BI__builtin_neon_vst2_lane_v:
4210   case NEON::BI__builtin_neon_vst2q_lane_v:
4211   case NEON::BI__builtin_neon_vst3_lane_v:
4212   case NEON::BI__builtin_neon_vst3q_lane_v:
4213   case NEON::BI__builtin_neon_vst4_lane_v:
4214   case NEON::BI__builtin_neon_vst4q_lane_v: {
4215     llvm::Type *Tys[] = {Int8PtrTy, Ty};
4216     Ops.push_back(getAlignmentValue32(PtrOp0));
4217     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
4218   }
4219   case NEON::BI__builtin_neon_vsubhn_v: {
4220     llvm::VectorType *SrcTy =
4221         llvm::VectorType::getExtendedElementVectorType(VTy);
4222 
4223     // %sum = add <4 x i32> %lhs, %rhs
4224     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4225     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4226     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
4227 
4228     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4229     Constant *ShiftAmt =
4230         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4231     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
4232 
4233     // %res = trunc <4 x i32> %high to <4 x i16>
4234     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
4235   }
4236   case NEON::BI__builtin_neon_vtrn_v:
4237   case NEON::BI__builtin_neon_vtrnq_v: {
4238     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4239     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4240     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4241     Value *SV = nullptr;
4242 
4243     for (unsigned vi = 0; vi != 2; ++vi) {
4244       SmallVector<uint32_t, 16> Indices;
4245       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4246         Indices.push_back(i+vi);
4247         Indices.push_back(i+e+vi);
4248       }
4249       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4250       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
4251       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4252     }
4253     return SV;
4254   }
4255   case NEON::BI__builtin_neon_vtst_v:
4256   case NEON::BI__builtin_neon_vtstq_v: {
4257     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4258     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4259     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4260     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4261                                 ConstantAggregateZero::get(Ty));
4262     return Builder.CreateSExt(Ops[0], Ty, "vtst");
4263   }
4264   case NEON::BI__builtin_neon_vuzp_v:
4265   case NEON::BI__builtin_neon_vuzpq_v: {
4266     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4267     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4268     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4269     Value *SV = nullptr;
4270 
4271     for (unsigned vi = 0; vi != 2; ++vi) {
4272       SmallVector<uint32_t, 16> Indices;
4273       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4274         Indices.push_back(2*i+vi);
4275 
4276       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4277       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
4278       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4279     }
4280     return SV;
4281   }
4282   case NEON::BI__builtin_neon_vzip_v:
4283   case NEON::BI__builtin_neon_vzipq_v: {
4284     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4285     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4286     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4287     Value *SV = nullptr;
4288 
4289     for (unsigned vi = 0; vi != 2; ++vi) {
4290       SmallVector<uint32_t, 16> Indices;
4291       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4292         Indices.push_back((i + vi*e) >> 1);
4293         Indices.push_back(((i + vi*e) >> 1)+e);
4294       }
4295       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4296       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
4297       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4298     }
4299     return SV;
4300   }
4301   }
4302 
4303   assert(Int && "Expected valid intrinsic number");
4304 
4305   // Determine the type(s) of this overloaded AArch64 intrinsic.
4306   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
4307 
4308   Value *Result = EmitNeonCall(F, Ops, NameHint);
4309   llvm::Type *ResultType = ConvertType(E->getType());
4310   // AArch64 intrinsic one-element vector type cast to
4311   // scalar type expected by the builtin
4312   return Builder.CreateBitCast(Result, ResultType, NameHint);
4313 }
4314 
4315 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
4316     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
4317     const CmpInst::Predicate Ip, const Twine &Name) {
4318   llvm::Type *OTy = Op->getType();
4319 
4320   // FIXME: this is utterly horrific. We should not be looking at previous
4321   // codegen context to find out what needs doing. Unfortunately TableGen
4322   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
4323   // (etc).
4324   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
4325     OTy = BI->getOperand(0)->getType();
4326 
4327   Op = Builder.CreateBitCast(Op, OTy);
4328   if (OTy->getScalarType()->isFloatingPointTy()) {
4329     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
4330   } else {
4331     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
4332   }
4333   return Builder.CreateSExt(Op, Ty, Name);
4334 }
4335 
4336 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
4337                                  Value *ExtOp, Value *IndexOp,
4338                                  llvm::Type *ResTy, unsigned IntID,
4339                                  const char *Name) {
4340   SmallVector<Value *, 2> TblOps;
4341   if (ExtOp)
4342     TblOps.push_back(ExtOp);
4343 
4344   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
4345   SmallVector<uint32_t, 16> Indices;
4346   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
4347   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
4348     Indices.push_back(2*i);
4349     Indices.push_back(2*i+1);
4350   }
4351 
4352   int PairPos = 0, End = Ops.size() - 1;
4353   while (PairPos < End) {
4354     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4355                                                      Ops[PairPos+1], Indices,
4356                                                      Name));
4357     PairPos += 2;
4358   }
4359 
4360   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
4361   // of the 128-bit lookup table with zero.
4362   if (PairPos == End) {
4363     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
4364     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4365                                                      ZeroTbl, Indices, Name));
4366   }
4367 
4368   Function *TblF;
4369   TblOps.push_back(IndexOp);
4370   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
4371 
4372   return CGF.EmitNeonCall(TblF, TblOps, Name);
4373 }
4374 
4375 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
4376   unsigned Value;
4377   switch (BuiltinID) {
4378   default:
4379     return nullptr;
4380   case ARM::BI__builtin_arm_nop:
4381     Value = 0;
4382     break;
4383   case ARM::BI__builtin_arm_yield:
4384   case ARM::BI__yield:
4385     Value = 1;
4386     break;
4387   case ARM::BI__builtin_arm_wfe:
4388   case ARM::BI__wfe:
4389     Value = 2;
4390     break;
4391   case ARM::BI__builtin_arm_wfi:
4392   case ARM::BI__wfi:
4393     Value = 3;
4394     break;
4395   case ARM::BI__builtin_arm_sev:
4396   case ARM::BI__sev:
4397     Value = 4;
4398     break;
4399   case ARM::BI__builtin_arm_sevl:
4400   case ARM::BI__sevl:
4401     Value = 5;
4402     break;
4403   }
4404 
4405   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
4406                             llvm::ConstantInt::get(Int32Ty, Value));
4407 }
4408 
4409 // Generates the IR for the read/write special register builtin,
4410 // ValueType is the type of the value that is to be written or read,
4411 // RegisterType is the type of the register being written to or read from.
4412 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
4413                                          const CallExpr *E,
4414                                          llvm::Type *RegisterType,
4415                                          llvm::Type *ValueType,
4416                                          bool IsRead,
4417                                          StringRef SysReg = "") {
4418   // write and register intrinsics only support 32 and 64 bit operations.
4419   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
4420           && "Unsupported size for register.");
4421 
4422   CodeGen::CGBuilderTy &Builder = CGF.Builder;
4423   CodeGen::CodeGenModule &CGM = CGF.CGM;
4424   LLVMContext &Context = CGM.getLLVMContext();
4425 
4426   if (SysReg.empty()) {
4427     const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
4428     SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
4429   }
4430 
4431   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
4432   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4433   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4434 
4435   llvm::Type *Types[] = { RegisterType };
4436 
4437   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
4438   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
4439             && "Can't fit 64-bit value in 32-bit register");
4440 
4441   if (IsRead) {
4442     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
4443     llvm::Value *Call = Builder.CreateCall(F, Metadata);
4444 
4445     if (MixedTypes)
4446       // Read into 64 bit register and then truncate result to 32 bit.
4447       return Builder.CreateTrunc(Call, ValueType);
4448 
4449     if (ValueType->isPointerTy())
4450       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
4451       return Builder.CreateIntToPtr(Call, ValueType);
4452 
4453     return Call;
4454   }
4455 
4456   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
4457   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
4458   if (MixedTypes) {
4459     // Extend 32 bit write value to 64 bit to pass to write.
4460     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
4461     return Builder.CreateCall(F, { Metadata, ArgValue });
4462   }
4463 
4464   if (ValueType->isPointerTy()) {
4465     // Have VoidPtrTy ArgValue but want to return an i32/i64.
4466     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
4467     return Builder.CreateCall(F, { Metadata, ArgValue });
4468   }
4469 
4470   return Builder.CreateCall(F, { Metadata, ArgValue });
4471 }
4472 
4473 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
4474 /// argument that specifies the vector type.
4475 static bool HasExtraNeonArgument(unsigned BuiltinID) {
4476   switch (BuiltinID) {
4477   default: break;
4478   case NEON::BI__builtin_neon_vget_lane_i8:
4479   case NEON::BI__builtin_neon_vget_lane_i16:
4480   case NEON::BI__builtin_neon_vget_lane_i32:
4481   case NEON::BI__builtin_neon_vget_lane_i64:
4482   case NEON::BI__builtin_neon_vget_lane_f32:
4483   case NEON::BI__builtin_neon_vgetq_lane_i8:
4484   case NEON::BI__builtin_neon_vgetq_lane_i16:
4485   case NEON::BI__builtin_neon_vgetq_lane_i32:
4486   case NEON::BI__builtin_neon_vgetq_lane_i64:
4487   case NEON::BI__builtin_neon_vgetq_lane_f32:
4488   case NEON::BI__builtin_neon_vset_lane_i8:
4489   case NEON::BI__builtin_neon_vset_lane_i16:
4490   case NEON::BI__builtin_neon_vset_lane_i32:
4491   case NEON::BI__builtin_neon_vset_lane_i64:
4492   case NEON::BI__builtin_neon_vset_lane_f32:
4493   case NEON::BI__builtin_neon_vsetq_lane_i8:
4494   case NEON::BI__builtin_neon_vsetq_lane_i16:
4495   case NEON::BI__builtin_neon_vsetq_lane_i32:
4496   case NEON::BI__builtin_neon_vsetq_lane_i64:
4497   case NEON::BI__builtin_neon_vsetq_lane_f32:
4498   case NEON::BI__builtin_neon_vsha1h_u32:
4499   case NEON::BI__builtin_neon_vsha1cq_u32:
4500   case NEON::BI__builtin_neon_vsha1pq_u32:
4501   case NEON::BI__builtin_neon_vsha1mq_u32:
4502   case ARM::BI_MoveToCoprocessor:
4503   case ARM::BI_MoveToCoprocessor2:
4504     return false;
4505   }
4506   return true;
4507 }
4508 
4509 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
4510                                            const CallExpr *E) {
4511   if (auto Hint = GetValueForARMHint(BuiltinID))
4512     return Hint;
4513 
4514   if (BuiltinID == ARM::BI__emit) {
4515     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
4516     llvm::FunctionType *FTy =
4517         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
4518 
4519     APSInt Value;
4520     if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
4521       llvm_unreachable("Sema will ensure that the parameter is constant");
4522 
4523     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
4524 
4525     llvm::InlineAsm *Emit =
4526         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
4527                                  /*SideEffects=*/true)
4528                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
4529                                  /*SideEffects=*/true);
4530 
4531     return Builder.CreateCall(Emit);
4532   }
4533 
4534   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
4535     Value *Option = EmitScalarExpr(E->getArg(0));
4536     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
4537   }
4538 
4539   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
4540     Value *Address = EmitScalarExpr(E->getArg(0));
4541     Value *RW      = EmitScalarExpr(E->getArg(1));
4542     Value *IsData  = EmitScalarExpr(E->getArg(2));
4543 
4544     // Locality is not supported on ARM target
4545     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
4546 
4547     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4548     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4549   }
4550 
4551   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
4552     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4553     return Builder.CreateCall(
4554         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4555   }
4556 
4557   if (BuiltinID == ARM::BI__clear_cache) {
4558     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4559     const FunctionDecl *FD = E->getDirectCallee();
4560     Value *Ops[2];
4561     for (unsigned i = 0; i < 2; i++)
4562       Ops[i] = EmitScalarExpr(E->getArg(i));
4563     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4564     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4565     StringRef Name = FD->getName();
4566     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4567   }
4568 
4569   if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4570       BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4571     Function *F;
4572 
4573     switch (BuiltinID) {
4574     default: llvm_unreachable("unexpected builtin");
4575     case ARM::BI__builtin_arm_mcrr:
4576       F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4577       break;
4578     case ARM::BI__builtin_arm_mcrr2:
4579       F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4580       break;
4581     }
4582 
4583     // MCRR{2} instruction has 5 operands but
4584     // the intrinsic has 4 because Rt and Rt2
4585     // are represented as a single unsigned 64
4586     // bit integer in the intrinsic definition
4587     // but internally it's represented as 2 32
4588     // bit integers.
4589 
4590     Value *Coproc = EmitScalarExpr(E->getArg(0));
4591     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4592     Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4593     Value *CRm = EmitScalarExpr(E->getArg(3));
4594 
4595     Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4596     Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4597     Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4598     Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4599 
4600     return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4601   }
4602 
4603   if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4604       BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4605     Function *F;
4606 
4607     switch (BuiltinID) {
4608     default: llvm_unreachable("unexpected builtin");
4609     case ARM::BI__builtin_arm_mrrc:
4610       F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4611       break;
4612     case ARM::BI__builtin_arm_mrrc2:
4613       F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4614       break;
4615     }
4616 
4617     Value *Coproc = EmitScalarExpr(E->getArg(0));
4618     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4619     Value *CRm  = EmitScalarExpr(E->getArg(2));
4620     Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4621 
4622     // Returns an unsigned 64 bit integer, represented
4623     // as two 32 bit integers.
4624 
4625     Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4626     Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4627     Rt = Builder.CreateZExt(Rt, Int64Ty);
4628     Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4629 
4630     Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4631     RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4632     RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4633 
4634     return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4635   }
4636 
4637   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4638       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4639         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4640        getContext().getTypeSize(E->getType()) == 64) ||
4641       BuiltinID == ARM::BI__ldrexd) {
4642     Function *F;
4643 
4644     switch (BuiltinID) {
4645     default: llvm_unreachable("unexpected builtin");
4646     case ARM::BI__builtin_arm_ldaex:
4647       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4648       break;
4649     case ARM::BI__builtin_arm_ldrexd:
4650     case ARM::BI__builtin_arm_ldrex:
4651     case ARM::BI__ldrexd:
4652       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4653       break;
4654     }
4655 
4656     Value *LdPtr = EmitScalarExpr(E->getArg(0));
4657     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4658                                     "ldrexd");
4659 
4660     Value *Val0 = Builder.CreateExtractValue(Val, 1);
4661     Value *Val1 = Builder.CreateExtractValue(Val, 0);
4662     Val0 = Builder.CreateZExt(Val0, Int64Ty);
4663     Val1 = Builder.CreateZExt(Val1, Int64Ty);
4664 
4665     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4666     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4667     Val = Builder.CreateOr(Val, Val1);
4668     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4669   }
4670 
4671   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4672       BuiltinID == ARM::BI__builtin_arm_ldaex) {
4673     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4674 
4675     QualType Ty = E->getType();
4676     llvm::Type *RealResTy = ConvertType(Ty);
4677     llvm::Type *PtrTy = llvm::IntegerType::get(
4678         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
4679     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
4680 
4681     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4682                                        ? Intrinsic::arm_ldaex
4683                                        : Intrinsic::arm_ldrex,
4684                                    PtrTy);
4685     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4686 
4687     if (RealResTy->isPointerTy())
4688       return Builder.CreateIntToPtr(Val, RealResTy);
4689     else {
4690       llvm::Type *IntResTy = llvm::IntegerType::get(
4691           getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
4692       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4693       return Builder.CreateBitCast(Val, RealResTy);
4694     }
4695   }
4696 
4697   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
4698       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4699         BuiltinID == ARM::BI__builtin_arm_strex) &&
4700        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
4701     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4702                                        ? Intrinsic::arm_stlexd
4703                                        : Intrinsic::arm_strexd);
4704     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
4705 
4706     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4707     Value *Val = EmitScalarExpr(E->getArg(0));
4708     Builder.CreateStore(Val, Tmp);
4709 
4710     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4711     Val = Builder.CreateLoad(LdPtr);
4712 
4713     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4714     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4715     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4716     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4717   }
4718 
4719   if (BuiltinID == ARM::BI__builtin_arm_strex ||
4720       BuiltinID == ARM::BI__builtin_arm_stlex) {
4721     Value *StoreVal = EmitScalarExpr(E->getArg(0));
4722     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4723 
4724     QualType Ty = E->getArg(0)->getType();
4725     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4726                                                  getContext().getTypeSize(Ty));
4727     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4728 
4729     if (StoreVal->getType()->isPointerTy())
4730       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4731     else {
4732       llvm::Type *IntTy = llvm::IntegerType::get(
4733           getLLVMContext(),
4734           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
4735       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
4736       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4737     }
4738 
4739     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4740                                        ? Intrinsic::arm_stlex
4741                                        : Intrinsic::arm_strex,
4742                                    StoreAddr->getType());
4743     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4744   }
4745 
4746   switch (BuiltinID) {
4747   case ARM::BI__iso_volatile_load8:
4748   case ARM::BI__iso_volatile_load16:
4749   case ARM::BI__iso_volatile_load32:
4750   case ARM::BI__iso_volatile_load64: {
4751     Value *Ptr = EmitScalarExpr(E->getArg(0));
4752     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4753     CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
4754     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4755                                              LoadSize.getQuantity() * 8);
4756     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4757     llvm::LoadInst *Load =
4758       Builder.CreateAlignedLoad(Ptr, LoadSize);
4759     Load->setVolatile(true);
4760     return Load;
4761   }
4762   case ARM::BI__iso_volatile_store8:
4763   case ARM::BI__iso_volatile_store16:
4764   case ARM::BI__iso_volatile_store32:
4765   case ARM::BI__iso_volatile_store64: {
4766     Value *Ptr = EmitScalarExpr(E->getArg(0));
4767     Value *Value = EmitScalarExpr(E->getArg(1));
4768     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4769     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
4770     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4771                                              StoreSize.getQuantity() * 8);
4772     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4773     llvm::StoreInst *Store =
4774       Builder.CreateAlignedStore(Value, Ptr,
4775                                  StoreSize);
4776     Store->setVolatile(true);
4777     return Store;
4778   }
4779   }
4780 
4781   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
4782     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4783     return Builder.CreateCall(F);
4784   }
4785 
4786   // CRC32
4787   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4788   switch (BuiltinID) {
4789   case ARM::BI__builtin_arm_crc32b:
4790     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
4791   case ARM::BI__builtin_arm_crc32cb:
4792     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
4793   case ARM::BI__builtin_arm_crc32h:
4794     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
4795   case ARM::BI__builtin_arm_crc32ch:
4796     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
4797   case ARM::BI__builtin_arm_crc32w:
4798   case ARM::BI__builtin_arm_crc32d:
4799     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
4800   case ARM::BI__builtin_arm_crc32cw:
4801   case ARM::BI__builtin_arm_crc32cd:
4802     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
4803   }
4804 
4805   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4806     Value *Arg0 = EmitScalarExpr(E->getArg(0));
4807     Value *Arg1 = EmitScalarExpr(E->getArg(1));
4808 
4809     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
4810     // intrinsics, hence we need different codegen for these cases.
4811     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
4812         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
4813       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4814       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
4815       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
4816       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
4817 
4818       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4819       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
4820       return Builder.CreateCall(F, {Res, Arg1b});
4821     } else {
4822       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
4823 
4824       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4825       return Builder.CreateCall(F, {Arg0, Arg1});
4826     }
4827   }
4828 
4829   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
4830       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4831       BuiltinID == ARM::BI__builtin_arm_rsrp ||
4832       BuiltinID == ARM::BI__builtin_arm_wsr ||
4833       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
4834       BuiltinID == ARM::BI__builtin_arm_wsrp) {
4835 
4836     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
4837                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4838                   BuiltinID == ARM::BI__builtin_arm_rsrp;
4839 
4840     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
4841                             BuiltinID == ARM::BI__builtin_arm_wsrp;
4842 
4843     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4844                    BuiltinID == ARM::BI__builtin_arm_wsr64;
4845 
4846     llvm::Type *ValueType;
4847     llvm::Type *RegisterType;
4848     if (IsPointerBuiltin) {
4849       ValueType = VoidPtrTy;
4850       RegisterType = Int32Ty;
4851     } else if (Is64Bit) {
4852       ValueType = RegisterType = Int64Ty;
4853     } else {
4854       ValueType = RegisterType = Int32Ty;
4855     }
4856 
4857     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4858   }
4859 
4860   // Find out if any arguments are required to be integer constant
4861   // expressions.
4862   unsigned ICEArguments = 0;
4863   ASTContext::GetBuiltinTypeError Error;
4864   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4865   assert(Error == ASTContext::GE_None && "Should not codegen an error");
4866 
4867   auto getAlignmentValue32 = [&](Address addr) -> Value* {
4868     return Builder.getInt32(addr.getAlignment().getQuantity());
4869   };
4870 
4871   Address PtrOp0 = Address::invalid();
4872   Address PtrOp1 = Address::invalid();
4873   SmallVector<Value*, 4> Ops;
4874   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
4875   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
4876   for (unsigned i = 0, e = NumArgs; i != e; i++) {
4877     if (i == 0) {
4878       switch (BuiltinID) {
4879       case NEON::BI__builtin_neon_vld1_v:
4880       case NEON::BI__builtin_neon_vld1q_v:
4881       case NEON::BI__builtin_neon_vld1q_lane_v:
4882       case NEON::BI__builtin_neon_vld1_lane_v:
4883       case NEON::BI__builtin_neon_vld1_dup_v:
4884       case NEON::BI__builtin_neon_vld1q_dup_v:
4885       case NEON::BI__builtin_neon_vst1_v:
4886       case NEON::BI__builtin_neon_vst1q_v:
4887       case NEON::BI__builtin_neon_vst1q_lane_v:
4888       case NEON::BI__builtin_neon_vst1_lane_v:
4889       case NEON::BI__builtin_neon_vst2_v:
4890       case NEON::BI__builtin_neon_vst2q_v:
4891       case NEON::BI__builtin_neon_vst2_lane_v:
4892       case NEON::BI__builtin_neon_vst2q_lane_v:
4893       case NEON::BI__builtin_neon_vst3_v:
4894       case NEON::BI__builtin_neon_vst3q_v:
4895       case NEON::BI__builtin_neon_vst3_lane_v:
4896       case NEON::BI__builtin_neon_vst3q_lane_v:
4897       case NEON::BI__builtin_neon_vst4_v:
4898       case NEON::BI__builtin_neon_vst4q_v:
4899       case NEON::BI__builtin_neon_vst4_lane_v:
4900       case NEON::BI__builtin_neon_vst4q_lane_v:
4901         // Get the alignment for the argument in addition to the value;
4902         // we'll use it later.
4903         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
4904         Ops.push_back(PtrOp0.getPointer());
4905         continue;
4906       }
4907     }
4908     if (i == 1) {
4909       switch (BuiltinID) {
4910       case NEON::BI__builtin_neon_vld2_v:
4911       case NEON::BI__builtin_neon_vld2q_v:
4912       case NEON::BI__builtin_neon_vld3_v:
4913       case NEON::BI__builtin_neon_vld3q_v:
4914       case NEON::BI__builtin_neon_vld4_v:
4915       case NEON::BI__builtin_neon_vld4q_v:
4916       case NEON::BI__builtin_neon_vld2_lane_v:
4917       case NEON::BI__builtin_neon_vld2q_lane_v:
4918       case NEON::BI__builtin_neon_vld3_lane_v:
4919       case NEON::BI__builtin_neon_vld3q_lane_v:
4920       case NEON::BI__builtin_neon_vld4_lane_v:
4921       case NEON::BI__builtin_neon_vld4q_lane_v:
4922       case NEON::BI__builtin_neon_vld2_dup_v:
4923       case NEON::BI__builtin_neon_vld3_dup_v:
4924       case NEON::BI__builtin_neon_vld4_dup_v:
4925         // Get the alignment for the argument in addition to the value;
4926         // we'll use it later.
4927         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
4928         Ops.push_back(PtrOp1.getPointer());
4929         continue;
4930       }
4931     }
4932 
4933     if ((ICEArguments & (1 << i)) == 0) {
4934       Ops.push_back(EmitScalarExpr(E->getArg(i)));
4935     } else {
4936       // If this is required to be a constant, constant fold it so that we know
4937       // that the generated intrinsic gets a ConstantInt.
4938       llvm::APSInt Result;
4939       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4940       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
4941       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4942     }
4943   }
4944 
4945   switch (BuiltinID) {
4946   default: break;
4947 
4948   case NEON::BI__builtin_neon_vget_lane_i8:
4949   case NEON::BI__builtin_neon_vget_lane_i16:
4950   case NEON::BI__builtin_neon_vget_lane_i32:
4951   case NEON::BI__builtin_neon_vget_lane_i64:
4952   case NEON::BI__builtin_neon_vget_lane_f32:
4953   case NEON::BI__builtin_neon_vgetq_lane_i8:
4954   case NEON::BI__builtin_neon_vgetq_lane_i16:
4955   case NEON::BI__builtin_neon_vgetq_lane_i32:
4956   case NEON::BI__builtin_neon_vgetq_lane_i64:
4957   case NEON::BI__builtin_neon_vgetq_lane_f32:
4958     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
4959 
4960   case NEON::BI__builtin_neon_vset_lane_i8:
4961   case NEON::BI__builtin_neon_vset_lane_i16:
4962   case NEON::BI__builtin_neon_vset_lane_i32:
4963   case NEON::BI__builtin_neon_vset_lane_i64:
4964   case NEON::BI__builtin_neon_vset_lane_f32:
4965   case NEON::BI__builtin_neon_vsetq_lane_i8:
4966   case NEON::BI__builtin_neon_vsetq_lane_i16:
4967   case NEON::BI__builtin_neon_vsetq_lane_i32:
4968   case NEON::BI__builtin_neon_vsetq_lane_i64:
4969   case NEON::BI__builtin_neon_vsetq_lane_f32:
4970     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4971 
4972   case NEON::BI__builtin_neon_vsha1h_u32:
4973     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
4974                         "vsha1h");
4975   case NEON::BI__builtin_neon_vsha1cq_u32:
4976     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
4977                         "vsha1h");
4978   case NEON::BI__builtin_neon_vsha1pq_u32:
4979     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
4980                         "vsha1h");
4981   case NEON::BI__builtin_neon_vsha1mq_u32:
4982     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
4983                         "vsha1h");
4984 
4985   // The ARM _MoveToCoprocessor builtins put the input register value as
4986   // the first argument, but the LLVM intrinsic expects it as the third one.
4987   case ARM::BI_MoveToCoprocessor:
4988   case ARM::BI_MoveToCoprocessor2: {
4989     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
4990                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
4991     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
4992                                   Ops[3], Ops[4], Ops[5]});
4993   }
4994   case ARM::BI_BitScanForward:
4995   case ARM::BI_BitScanForward64:
4996     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
4997   case ARM::BI_BitScanReverse:
4998   case ARM::BI_BitScanReverse64:
4999     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
5000 
5001   case ARM::BI_InterlockedAnd64:
5002     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
5003   case ARM::BI_InterlockedExchange64:
5004     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
5005   case ARM::BI_InterlockedExchangeAdd64:
5006     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
5007   case ARM::BI_InterlockedExchangeSub64:
5008     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
5009   case ARM::BI_InterlockedOr64:
5010     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
5011   case ARM::BI_InterlockedXor64:
5012     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
5013   case ARM::BI_InterlockedDecrement64:
5014     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
5015   case ARM::BI_InterlockedIncrement64:
5016     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
5017   }
5018 
5019   // Get the last argument, which specifies the vector type.
5020   assert(HasExtraArg);
5021   llvm::APSInt Result;
5022   const Expr *Arg = E->getArg(E->getNumArgs()-1);
5023   if (!Arg->isIntegerConstantExpr(Result, getContext()))
5024     return nullptr;
5025 
5026   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
5027       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
5028     // Determine the overloaded type of this builtin.
5029     llvm::Type *Ty;
5030     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
5031       Ty = FloatTy;
5032     else
5033       Ty = DoubleTy;
5034 
5035     // Determine whether this is an unsigned conversion or not.
5036     bool usgn = Result.getZExtValue() == 1;
5037     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
5038 
5039     // Call the appropriate intrinsic.
5040     Function *F = CGM.getIntrinsic(Int, Ty);
5041     return Builder.CreateCall(F, Ops, "vcvtr");
5042   }
5043 
5044   // Determine the type of this overloaded NEON intrinsic.
5045   NeonTypeFlags Type(Result.getZExtValue());
5046   bool usgn = Type.isUnsigned();
5047   bool rightShift = false;
5048 
5049   llvm::VectorType *VTy = GetNeonType(this, Type);
5050   llvm::Type *Ty = VTy;
5051   if (!Ty)
5052     return nullptr;
5053 
5054   // Many NEON builtins have identical semantics and uses in ARM and
5055   // AArch64. Emit these in a single function.
5056   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
5057   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5058       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
5059   if (Builtin)
5060     return EmitCommonNeonBuiltinExpr(
5061         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5062         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
5063 
5064   unsigned Int;
5065   switch (BuiltinID) {
5066   default: return nullptr;
5067   case NEON::BI__builtin_neon_vld1q_lane_v:
5068     // Handle 64-bit integer elements as a special case.  Use shuffles of
5069     // one-element vectors to avoid poor code for i64 in the backend.
5070     if (VTy->getElementType()->isIntegerTy(64)) {
5071       // Extract the other lane.
5072       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5073       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
5074       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
5075       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5076       // Load the value as a one-element vector.
5077       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
5078       llvm::Type *Tys[] = {Ty, Int8PtrTy};
5079       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
5080       Value *Align = getAlignmentValue32(PtrOp0);
5081       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
5082       // Combine them.
5083       uint32_t Indices[] = {1 - Lane, Lane};
5084       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
5085       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
5086     }
5087     // fall through
5088   case NEON::BI__builtin_neon_vld1_lane_v: {
5089     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5090     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
5091     Value *Ld = Builder.CreateLoad(PtrOp0);
5092     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
5093   }
5094   case NEON::BI__builtin_neon_vld2_dup_v:
5095   case NEON::BI__builtin_neon_vld3_dup_v:
5096   case NEON::BI__builtin_neon_vld4_dup_v: {
5097     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
5098     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
5099       switch (BuiltinID) {
5100       case NEON::BI__builtin_neon_vld2_dup_v:
5101         Int = Intrinsic::arm_neon_vld2;
5102         break;
5103       case NEON::BI__builtin_neon_vld3_dup_v:
5104         Int = Intrinsic::arm_neon_vld3;
5105         break;
5106       case NEON::BI__builtin_neon_vld4_dup_v:
5107         Int = Intrinsic::arm_neon_vld4;
5108         break;
5109       default: llvm_unreachable("unknown vld_dup intrinsic?");
5110       }
5111       llvm::Type *Tys[] = {Ty, Int8PtrTy};
5112       Function *F = CGM.getIntrinsic(Int, Tys);
5113       llvm::Value *Align = getAlignmentValue32(PtrOp1);
5114       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
5115       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5116       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5117       return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5118     }
5119     switch (BuiltinID) {
5120     case NEON::BI__builtin_neon_vld2_dup_v:
5121       Int = Intrinsic::arm_neon_vld2lane;
5122       break;
5123     case NEON::BI__builtin_neon_vld3_dup_v:
5124       Int = Intrinsic::arm_neon_vld3lane;
5125       break;
5126     case NEON::BI__builtin_neon_vld4_dup_v:
5127       Int = Intrinsic::arm_neon_vld4lane;
5128       break;
5129     default: llvm_unreachable("unknown vld_dup intrinsic?");
5130     }
5131     llvm::Type *Tys[] = {Ty, Int8PtrTy};
5132     Function *F = CGM.getIntrinsic(Int, Tys);
5133     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
5134 
5135     SmallVector<Value*, 6> Args;
5136     Args.push_back(Ops[1]);
5137     Args.append(STy->getNumElements(), UndefValue::get(Ty));
5138 
5139     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
5140     Args.push_back(CI);
5141     Args.push_back(getAlignmentValue32(PtrOp1));
5142 
5143     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
5144     // splat lane 0 to all elts in each vector of the result.
5145     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5146       Value *Val = Builder.CreateExtractValue(Ops[1], i);
5147       Value *Elt = Builder.CreateBitCast(Val, Ty);
5148       Elt = EmitNeonSplat(Elt, CI);
5149       Elt = Builder.CreateBitCast(Elt, Val->getType());
5150       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
5151     }
5152     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5153     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5154     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5155   }
5156   case NEON::BI__builtin_neon_vqrshrn_n_v:
5157     Int =
5158       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
5159     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
5160                         1, true);
5161   case NEON::BI__builtin_neon_vqrshrun_n_v:
5162     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
5163                         Ops, "vqrshrun_n", 1, true);
5164   case NEON::BI__builtin_neon_vqshrn_n_v:
5165     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
5166     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
5167                         1, true);
5168   case NEON::BI__builtin_neon_vqshrun_n_v:
5169     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
5170                         Ops, "vqshrun_n", 1, true);
5171   case NEON::BI__builtin_neon_vrecpe_v:
5172   case NEON::BI__builtin_neon_vrecpeq_v:
5173     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
5174                         Ops, "vrecpe");
5175   case NEON::BI__builtin_neon_vrshrn_n_v:
5176     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
5177                         Ops, "vrshrn_n", 1, true);
5178   case NEON::BI__builtin_neon_vrsra_n_v:
5179   case NEON::BI__builtin_neon_vrsraq_n_v:
5180     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5181     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5182     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
5183     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
5184     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
5185     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
5186   case NEON::BI__builtin_neon_vsri_n_v:
5187   case NEON::BI__builtin_neon_vsriq_n_v:
5188     rightShift = true;
5189     LLVM_FALLTHROUGH;
5190   case NEON::BI__builtin_neon_vsli_n_v:
5191   case NEON::BI__builtin_neon_vsliq_n_v:
5192     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
5193     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
5194                         Ops, "vsli_n");
5195   case NEON::BI__builtin_neon_vsra_n_v:
5196   case NEON::BI__builtin_neon_vsraq_n_v:
5197     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5198     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
5199     return Builder.CreateAdd(Ops[0], Ops[1]);
5200   case NEON::BI__builtin_neon_vst1q_lane_v:
5201     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
5202     // a one-element vector and avoid poor code for i64 in the backend.
5203     if (VTy->getElementType()->isIntegerTy(64)) {
5204       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5205       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
5206       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5207       Ops[2] = getAlignmentValue32(PtrOp0);
5208       llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
5209       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
5210                                                  Tys), Ops);
5211     }
5212     // fall through
5213   case NEON::BI__builtin_neon_vst1_lane_v: {
5214     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5215     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
5216     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5217     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
5218     return St;
5219   }
5220   case NEON::BI__builtin_neon_vtbl1_v:
5221     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
5222                         Ops, "vtbl1");
5223   case NEON::BI__builtin_neon_vtbl2_v:
5224     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
5225                         Ops, "vtbl2");
5226   case NEON::BI__builtin_neon_vtbl3_v:
5227     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
5228                         Ops, "vtbl3");
5229   case NEON::BI__builtin_neon_vtbl4_v:
5230     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
5231                         Ops, "vtbl4");
5232   case NEON::BI__builtin_neon_vtbx1_v:
5233     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
5234                         Ops, "vtbx1");
5235   case NEON::BI__builtin_neon_vtbx2_v:
5236     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
5237                         Ops, "vtbx2");
5238   case NEON::BI__builtin_neon_vtbx3_v:
5239     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
5240                         Ops, "vtbx3");
5241   case NEON::BI__builtin_neon_vtbx4_v:
5242     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
5243                         Ops, "vtbx4");
5244   }
5245 }
5246 
5247 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
5248                                       const CallExpr *E,
5249                                       SmallVectorImpl<Value *> &Ops) {
5250   unsigned int Int = 0;
5251   const char *s = nullptr;
5252 
5253   switch (BuiltinID) {
5254   default:
5255     return nullptr;
5256   case NEON::BI__builtin_neon_vtbl1_v:
5257   case NEON::BI__builtin_neon_vqtbl1_v:
5258   case NEON::BI__builtin_neon_vqtbl1q_v:
5259   case NEON::BI__builtin_neon_vtbl2_v:
5260   case NEON::BI__builtin_neon_vqtbl2_v:
5261   case NEON::BI__builtin_neon_vqtbl2q_v:
5262   case NEON::BI__builtin_neon_vtbl3_v:
5263   case NEON::BI__builtin_neon_vqtbl3_v:
5264   case NEON::BI__builtin_neon_vqtbl3q_v:
5265   case NEON::BI__builtin_neon_vtbl4_v:
5266   case NEON::BI__builtin_neon_vqtbl4_v:
5267   case NEON::BI__builtin_neon_vqtbl4q_v:
5268     break;
5269   case NEON::BI__builtin_neon_vtbx1_v:
5270   case NEON::BI__builtin_neon_vqtbx1_v:
5271   case NEON::BI__builtin_neon_vqtbx1q_v:
5272   case NEON::BI__builtin_neon_vtbx2_v:
5273   case NEON::BI__builtin_neon_vqtbx2_v:
5274   case NEON::BI__builtin_neon_vqtbx2q_v:
5275   case NEON::BI__builtin_neon_vtbx3_v:
5276   case NEON::BI__builtin_neon_vqtbx3_v:
5277   case NEON::BI__builtin_neon_vqtbx3q_v:
5278   case NEON::BI__builtin_neon_vtbx4_v:
5279   case NEON::BI__builtin_neon_vqtbx4_v:
5280   case NEON::BI__builtin_neon_vqtbx4q_v:
5281     break;
5282   }
5283 
5284   assert(E->getNumArgs() >= 3);
5285 
5286   // Get the last argument, which specifies the vector type.
5287   llvm::APSInt Result;
5288   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
5289   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
5290     return nullptr;
5291 
5292   // Determine the type of this overloaded NEON intrinsic.
5293   NeonTypeFlags Type(Result.getZExtValue());
5294   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
5295   if (!Ty)
5296     return nullptr;
5297 
5298   CodeGen::CGBuilderTy &Builder = CGF.Builder;
5299 
5300   // AArch64 scalar builtins are not overloaded, they do not have an extra
5301   // argument that specifies the vector type, need to handle each case.
5302   switch (BuiltinID) {
5303   case NEON::BI__builtin_neon_vtbl1_v: {
5304     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
5305                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
5306                               "vtbl1");
5307   }
5308   case NEON::BI__builtin_neon_vtbl2_v: {
5309     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
5310                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
5311                               "vtbl1");
5312   }
5313   case NEON::BI__builtin_neon_vtbl3_v: {
5314     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
5315                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
5316                               "vtbl2");
5317   }
5318   case NEON::BI__builtin_neon_vtbl4_v: {
5319     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
5320                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
5321                               "vtbl2");
5322   }
5323   case NEON::BI__builtin_neon_vtbx1_v: {
5324     Value *TblRes =
5325         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
5326                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
5327 
5328     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
5329     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
5330     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5331 
5332     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5333     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5334     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5335   }
5336   case NEON::BI__builtin_neon_vtbx2_v: {
5337     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
5338                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
5339                               "vtbx1");
5340   }
5341   case NEON::BI__builtin_neon_vtbx3_v: {
5342     Value *TblRes =
5343         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
5344                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
5345 
5346     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
5347     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
5348                                            TwentyFourV);
5349     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5350 
5351     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5352     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5353     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5354   }
5355   case NEON::BI__builtin_neon_vtbx4_v: {
5356     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
5357                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
5358                               "vtbx2");
5359   }
5360   case NEON::BI__builtin_neon_vqtbl1_v:
5361   case NEON::BI__builtin_neon_vqtbl1q_v:
5362     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
5363   case NEON::BI__builtin_neon_vqtbl2_v:
5364   case NEON::BI__builtin_neon_vqtbl2q_v: {
5365     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
5366   case NEON::BI__builtin_neon_vqtbl3_v:
5367   case NEON::BI__builtin_neon_vqtbl3q_v:
5368     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
5369   case NEON::BI__builtin_neon_vqtbl4_v:
5370   case NEON::BI__builtin_neon_vqtbl4q_v:
5371     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
5372   case NEON::BI__builtin_neon_vqtbx1_v:
5373   case NEON::BI__builtin_neon_vqtbx1q_v:
5374     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
5375   case NEON::BI__builtin_neon_vqtbx2_v:
5376   case NEON::BI__builtin_neon_vqtbx2q_v:
5377     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
5378   case NEON::BI__builtin_neon_vqtbx3_v:
5379   case NEON::BI__builtin_neon_vqtbx3q_v:
5380     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
5381   case NEON::BI__builtin_neon_vqtbx4_v:
5382   case NEON::BI__builtin_neon_vqtbx4q_v:
5383     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
5384   }
5385   }
5386 
5387   if (!Int)
5388     return nullptr;
5389 
5390   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
5391   return CGF.EmitNeonCall(F, Ops, s);
5392 }
5393 
5394 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
5395   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
5396   Op = Builder.CreateBitCast(Op, Int16Ty);
5397   Value *V = UndefValue::get(VTy);
5398   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
5399   Op = Builder.CreateInsertElement(V, Op, CI);
5400   return Op;
5401 }
5402 
5403 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
5404                                                const CallExpr *E) {
5405   unsigned HintID = static_cast<unsigned>(-1);
5406   switch (BuiltinID) {
5407   default: break;
5408   case AArch64::BI__builtin_arm_nop:
5409     HintID = 0;
5410     break;
5411   case AArch64::BI__builtin_arm_yield:
5412     HintID = 1;
5413     break;
5414   case AArch64::BI__builtin_arm_wfe:
5415     HintID = 2;
5416     break;
5417   case AArch64::BI__builtin_arm_wfi:
5418     HintID = 3;
5419     break;
5420   case AArch64::BI__builtin_arm_sev:
5421     HintID = 4;
5422     break;
5423   case AArch64::BI__builtin_arm_sevl:
5424     HintID = 5;
5425     break;
5426   }
5427 
5428   if (HintID != static_cast<unsigned>(-1)) {
5429     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5430     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5431   }
5432 
5433   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
5434     Value *Address         = EmitScalarExpr(E->getArg(0));
5435     Value *RW              = EmitScalarExpr(E->getArg(1));
5436     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
5437     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
5438     Value *IsData          = EmitScalarExpr(E->getArg(4));
5439 
5440     Value *Locality = nullptr;
5441     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
5442       // Temporal fetch, needs to convert cache level to locality.
5443       Locality = llvm::ConstantInt::get(Int32Ty,
5444         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
5445     } else {
5446       // Streaming fetch.
5447       Locality = llvm::ConstantInt::get(Int32Ty, 0);
5448     }
5449 
5450     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
5451     // PLDL3STRM or PLDL2STRM.
5452     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
5453     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5454   }
5455 
5456   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
5457     assert((getContext().getTypeSize(E->getType()) == 32) &&
5458            "rbit of unusual size!");
5459     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5460     return Builder.CreateCall(
5461         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5462   }
5463   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
5464     assert((getContext().getTypeSize(E->getType()) == 64) &&
5465            "rbit of unusual size!");
5466     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5467     return Builder.CreateCall(
5468         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5469   }
5470 
5471   if (BuiltinID == AArch64::BI__clear_cache) {
5472     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5473     const FunctionDecl *FD = E->getDirectCallee();
5474     Value *Ops[2];
5475     for (unsigned i = 0; i < 2; i++)
5476       Ops[i] = EmitScalarExpr(E->getArg(i));
5477     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5478     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5479     StringRef Name = FD->getName();
5480     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5481   }
5482 
5483   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5484       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
5485       getContext().getTypeSize(E->getType()) == 128) {
5486     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5487                                        ? Intrinsic::aarch64_ldaxp
5488                                        : Intrinsic::aarch64_ldxp);
5489 
5490     Value *LdPtr = EmitScalarExpr(E->getArg(0));
5491     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
5492                                     "ldxp");
5493 
5494     Value *Val0 = Builder.CreateExtractValue(Val, 1);
5495     Value *Val1 = Builder.CreateExtractValue(Val, 0);
5496     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5497     Val0 = Builder.CreateZExt(Val0, Int128Ty);
5498     Val1 = Builder.CreateZExt(Val1, Int128Ty);
5499 
5500     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5501     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5502     Val = Builder.CreateOr(Val, Val1);
5503     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5504   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5505              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
5506     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5507 
5508     QualType Ty = E->getType();
5509     llvm::Type *RealResTy = ConvertType(Ty);
5510     llvm::Type *PtrTy = llvm::IntegerType::get(
5511         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
5512     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
5513 
5514     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5515                                        ? Intrinsic::aarch64_ldaxr
5516                                        : Intrinsic::aarch64_ldxr,
5517                                    PtrTy);
5518     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5519 
5520     if (RealResTy->isPointerTy())
5521       return Builder.CreateIntToPtr(Val, RealResTy);
5522 
5523     llvm::Type *IntResTy = llvm::IntegerType::get(
5524         getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5525     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
5526     return Builder.CreateBitCast(Val, RealResTy);
5527   }
5528 
5529   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
5530        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
5531       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5532     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5533                                        ? Intrinsic::aarch64_stlxp
5534                                        : Intrinsic::aarch64_stxp);
5535     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
5536 
5537     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5538     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5539 
5540     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
5541     llvm::Value *Val = Builder.CreateLoad(Tmp);
5542 
5543     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5544     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5545     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
5546                                          Int8PtrTy);
5547     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5548   }
5549 
5550   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
5551       BuiltinID == AArch64::BI__builtin_arm_stlex) {
5552     Value *StoreVal = EmitScalarExpr(E->getArg(0));
5553     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5554 
5555     QualType Ty = E->getArg(0)->getType();
5556     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
5557                                                  getContext().getTypeSize(Ty));
5558     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
5559 
5560     if (StoreVal->getType()->isPointerTy())
5561       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5562     else {
5563       llvm::Type *IntTy = llvm::IntegerType::get(
5564           getLLVMContext(),
5565           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5566       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5567       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5568     }
5569 
5570     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5571                                        ? Intrinsic::aarch64_stlxr
5572                                        : Intrinsic::aarch64_stxr,
5573                                    StoreAddr->getType());
5574     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5575   }
5576 
5577   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
5578     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5579     return Builder.CreateCall(F);
5580   }
5581 
5582   // CRC32
5583   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5584   switch (BuiltinID) {
5585   case AArch64::BI__builtin_arm_crc32b:
5586     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5587   case AArch64::BI__builtin_arm_crc32cb:
5588     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5589   case AArch64::BI__builtin_arm_crc32h:
5590     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5591   case AArch64::BI__builtin_arm_crc32ch:
5592     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5593   case AArch64::BI__builtin_arm_crc32w:
5594     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5595   case AArch64::BI__builtin_arm_crc32cw:
5596     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5597   case AArch64::BI__builtin_arm_crc32d:
5598     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5599   case AArch64::BI__builtin_arm_crc32cd:
5600     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5601   }
5602 
5603   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5604     Value *Arg0 = EmitScalarExpr(E->getArg(0));
5605     Value *Arg1 = EmitScalarExpr(E->getArg(1));
5606     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5607 
5608     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5609     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5610 
5611     return Builder.CreateCall(F, {Arg0, Arg1});
5612   }
5613 
5614   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
5615       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5616       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5617       BuiltinID == AArch64::BI__builtin_arm_wsr ||
5618       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
5619       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
5620 
5621     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
5622                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5623                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
5624 
5625     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5626                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
5627 
5628     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
5629                    BuiltinID != AArch64::BI__builtin_arm_wsr;
5630 
5631     llvm::Type *ValueType;
5632     llvm::Type *RegisterType = Int64Ty;
5633     if (IsPointerBuiltin) {
5634       ValueType = VoidPtrTy;
5635     } else if (Is64Bit) {
5636       ValueType = Int64Ty;
5637     } else {
5638       ValueType = Int32Ty;
5639     }
5640 
5641     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5642   }
5643 
5644   // Find out if any arguments are required to be integer constant
5645   // expressions.
5646   unsigned ICEArguments = 0;
5647   ASTContext::GetBuiltinTypeError Error;
5648   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5649   assert(Error == ASTContext::GE_None && "Should not codegen an error");
5650 
5651   llvm::SmallVector<Value*, 4> Ops;
5652   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5653     if ((ICEArguments & (1 << i)) == 0) {
5654       Ops.push_back(EmitScalarExpr(E->getArg(i)));
5655     } else {
5656       // If this is required to be a constant, constant fold it so that we know
5657       // that the generated intrinsic gets a ConstantInt.
5658       llvm::APSInt Result;
5659       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5660       assert(IsConst && "Constant arg isn't actually constant?");
5661       (void)IsConst;
5662       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5663     }
5664   }
5665 
5666   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
5667   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5668       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5669 
5670   if (Builtin) {
5671     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5672     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
5673     assert(Result && "SISD intrinsic should have been handled");
5674     return Result;
5675   }
5676 
5677   llvm::APSInt Result;
5678   const Expr *Arg = E->getArg(E->getNumArgs()-1);
5679   NeonTypeFlags Type(0);
5680   if (Arg->isIntegerConstantExpr(Result, getContext()))
5681     // Determine the type of this overloaded NEON intrinsic.
5682     Type = NeonTypeFlags(Result.getZExtValue());
5683 
5684   bool usgn = Type.isUnsigned();
5685   bool quad = Type.isQuad();
5686 
5687   // Handle non-overloaded intrinsics first.
5688   switch (BuiltinID) {
5689   default: break;
5690   case NEON::BI__builtin_neon_vldrq_p128: {
5691     llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5692     llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
5693     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
5694     return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5695                                      CharUnits::fromQuantity(16));
5696   }
5697   case NEON::BI__builtin_neon_vstrq_p128: {
5698     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5699     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
5700     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5701   }
5702   case NEON::BI__builtin_neon_vcvts_u32_f32:
5703   case NEON::BI__builtin_neon_vcvtd_u64_f64:
5704     usgn = true;
5705     // FALL THROUGH
5706   case NEON::BI__builtin_neon_vcvts_s32_f32:
5707   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
5708     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5709     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5710     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5711     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5712     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
5713     if (usgn)
5714       return Builder.CreateFPToUI(Ops[0], InTy);
5715     return Builder.CreateFPToSI(Ops[0], InTy);
5716   }
5717   case NEON::BI__builtin_neon_vcvts_f32_u32:
5718   case NEON::BI__builtin_neon_vcvtd_f64_u64:
5719     usgn = true;
5720     // FALL THROUGH
5721   case NEON::BI__builtin_neon_vcvts_f32_s32:
5722   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5723     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5724     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5725     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5726     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5727     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5728     if (usgn)
5729       return Builder.CreateUIToFP(Ops[0], FTy);
5730     return Builder.CreateSIToFP(Ops[0], FTy);
5731   }
5732   case NEON::BI__builtin_neon_vpaddd_s64: {
5733     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
5734     Value *Vec = EmitScalarExpr(E->getArg(0));
5735     // The vector is v2f64, so make sure it's bitcast to that.
5736     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5737     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5738     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5739     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5740     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5741     // Pairwise addition of a v2f64 into a scalar f64.
5742     return Builder.CreateAdd(Op0, Op1, "vpaddd");
5743   }
5744   case NEON::BI__builtin_neon_vpaddd_f64: {
5745     llvm::Type *Ty =
5746       llvm::VectorType::get(DoubleTy, 2);
5747     Value *Vec = EmitScalarExpr(E->getArg(0));
5748     // The vector is v2f64, so make sure it's bitcast to that.
5749     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5750     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5751     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5752     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5753     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5754     // Pairwise addition of a v2f64 into a scalar f64.
5755     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5756   }
5757   case NEON::BI__builtin_neon_vpadds_f32: {
5758     llvm::Type *Ty =
5759       llvm::VectorType::get(FloatTy, 2);
5760     Value *Vec = EmitScalarExpr(E->getArg(0));
5761     // The vector is v2f32, so make sure it's bitcast to that.
5762     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5763     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5764     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5765     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5766     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5767     // Pairwise addition of a v2f32 into a scalar f32.
5768     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5769   }
5770   case NEON::BI__builtin_neon_vceqzd_s64:
5771   case NEON::BI__builtin_neon_vceqzd_f64:
5772   case NEON::BI__builtin_neon_vceqzs_f32:
5773     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5774     return EmitAArch64CompareBuiltinExpr(
5775         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5776         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5777   case NEON::BI__builtin_neon_vcgezd_s64:
5778   case NEON::BI__builtin_neon_vcgezd_f64:
5779   case NEON::BI__builtin_neon_vcgezs_f32:
5780     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5781     return EmitAArch64CompareBuiltinExpr(
5782         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5783         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5784   case NEON::BI__builtin_neon_vclezd_s64:
5785   case NEON::BI__builtin_neon_vclezd_f64:
5786   case NEON::BI__builtin_neon_vclezs_f32:
5787     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5788     return EmitAArch64CompareBuiltinExpr(
5789         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5790         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
5791   case NEON::BI__builtin_neon_vcgtzd_s64:
5792   case NEON::BI__builtin_neon_vcgtzd_f64:
5793   case NEON::BI__builtin_neon_vcgtzs_f32:
5794     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5795     return EmitAArch64CompareBuiltinExpr(
5796         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5797         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
5798   case NEON::BI__builtin_neon_vcltzd_s64:
5799   case NEON::BI__builtin_neon_vcltzd_f64:
5800   case NEON::BI__builtin_neon_vcltzs_f32:
5801     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5802     return EmitAArch64CompareBuiltinExpr(
5803         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5804         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
5805 
5806   case NEON::BI__builtin_neon_vceqzd_u64: {
5807     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5808     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5809     Ops[0] =
5810         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
5811     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
5812   }
5813   case NEON::BI__builtin_neon_vceqd_f64:
5814   case NEON::BI__builtin_neon_vcled_f64:
5815   case NEON::BI__builtin_neon_vcltd_f64:
5816   case NEON::BI__builtin_neon_vcged_f64:
5817   case NEON::BI__builtin_neon_vcgtd_f64: {
5818     llvm::CmpInst::Predicate P;
5819     switch (BuiltinID) {
5820     default: llvm_unreachable("missing builtin ID in switch!");
5821     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5822     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5823     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5824     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5825     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5826     }
5827     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5828     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5829     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5830     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5831     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5832   }
5833   case NEON::BI__builtin_neon_vceqs_f32:
5834   case NEON::BI__builtin_neon_vcles_f32:
5835   case NEON::BI__builtin_neon_vclts_f32:
5836   case NEON::BI__builtin_neon_vcges_f32:
5837   case NEON::BI__builtin_neon_vcgts_f32: {
5838     llvm::CmpInst::Predicate P;
5839     switch (BuiltinID) {
5840     default: llvm_unreachable("missing builtin ID in switch!");
5841     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5842     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5843     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5844     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5845     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5846     }
5847     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5848     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5849     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5850     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5851     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5852   }
5853   case NEON::BI__builtin_neon_vceqd_s64:
5854   case NEON::BI__builtin_neon_vceqd_u64:
5855   case NEON::BI__builtin_neon_vcgtd_s64:
5856   case NEON::BI__builtin_neon_vcgtd_u64:
5857   case NEON::BI__builtin_neon_vcltd_s64:
5858   case NEON::BI__builtin_neon_vcltd_u64:
5859   case NEON::BI__builtin_neon_vcged_u64:
5860   case NEON::BI__builtin_neon_vcged_s64:
5861   case NEON::BI__builtin_neon_vcled_u64:
5862   case NEON::BI__builtin_neon_vcled_s64: {
5863     llvm::CmpInst::Predicate P;
5864     switch (BuiltinID) {
5865     default: llvm_unreachable("missing builtin ID in switch!");
5866     case NEON::BI__builtin_neon_vceqd_s64:
5867     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5868     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5869     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5870     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5871     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5872     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5873     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5874     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5875     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5876     }
5877     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5878     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5879     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5880     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5881     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5882   }
5883   case NEON::BI__builtin_neon_vtstd_s64:
5884   case NEON::BI__builtin_neon_vtstd_u64: {
5885     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5886     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5887     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5888     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5889     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5890                                 llvm::Constant::getNullValue(Int64Ty));
5891     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5892   }
5893   case NEON::BI__builtin_neon_vset_lane_i8:
5894   case NEON::BI__builtin_neon_vset_lane_i16:
5895   case NEON::BI__builtin_neon_vset_lane_i32:
5896   case NEON::BI__builtin_neon_vset_lane_i64:
5897   case NEON::BI__builtin_neon_vset_lane_f32:
5898   case NEON::BI__builtin_neon_vsetq_lane_i8:
5899   case NEON::BI__builtin_neon_vsetq_lane_i16:
5900   case NEON::BI__builtin_neon_vsetq_lane_i32:
5901   case NEON::BI__builtin_neon_vsetq_lane_i64:
5902   case NEON::BI__builtin_neon_vsetq_lane_f32:
5903     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5904     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5905   case NEON::BI__builtin_neon_vset_lane_f64:
5906     // The vector type needs a cast for the v1f64 variant.
5907     Ops[1] = Builder.CreateBitCast(Ops[1],
5908                                    llvm::VectorType::get(DoubleTy, 1));
5909     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5910     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5911   case NEON::BI__builtin_neon_vsetq_lane_f64:
5912     // The vector type needs a cast for the v2f64 variant.
5913     Ops[1] = Builder.CreateBitCast(Ops[1],
5914         llvm::VectorType::get(DoubleTy, 2));
5915     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5916     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5917 
5918   case NEON::BI__builtin_neon_vget_lane_i8:
5919   case NEON::BI__builtin_neon_vdupb_lane_i8:
5920     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
5921     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5922                                         "vget_lane");
5923   case NEON::BI__builtin_neon_vgetq_lane_i8:
5924   case NEON::BI__builtin_neon_vdupb_laneq_i8:
5925     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
5926     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5927                                         "vgetq_lane");
5928   case NEON::BI__builtin_neon_vget_lane_i16:
5929   case NEON::BI__builtin_neon_vduph_lane_i16:
5930     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
5931     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5932                                         "vget_lane");
5933   case NEON::BI__builtin_neon_vgetq_lane_i16:
5934   case NEON::BI__builtin_neon_vduph_laneq_i16:
5935     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
5936     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5937                                         "vgetq_lane");
5938   case NEON::BI__builtin_neon_vget_lane_i32:
5939   case NEON::BI__builtin_neon_vdups_lane_i32:
5940     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
5941     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5942                                         "vget_lane");
5943   case NEON::BI__builtin_neon_vdups_lane_f32:
5944     Ops[0] = Builder.CreateBitCast(Ops[0],
5945         llvm::VectorType::get(FloatTy, 2));
5946     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5947                                         "vdups_lane");
5948   case NEON::BI__builtin_neon_vgetq_lane_i32:
5949   case NEON::BI__builtin_neon_vdups_laneq_i32:
5950     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
5951     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5952                                         "vgetq_lane");
5953   case NEON::BI__builtin_neon_vget_lane_i64:
5954   case NEON::BI__builtin_neon_vdupd_lane_i64:
5955     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
5956     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5957                                         "vget_lane");
5958   case NEON::BI__builtin_neon_vdupd_lane_f64:
5959     Ops[0] = Builder.CreateBitCast(Ops[0],
5960         llvm::VectorType::get(DoubleTy, 1));
5961     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5962                                         "vdupd_lane");
5963   case NEON::BI__builtin_neon_vgetq_lane_i64:
5964   case NEON::BI__builtin_neon_vdupd_laneq_i64:
5965     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
5966     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5967                                         "vgetq_lane");
5968   case NEON::BI__builtin_neon_vget_lane_f32:
5969     Ops[0] = Builder.CreateBitCast(Ops[0],
5970         llvm::VectorType::get(FloatTy, 2));
5971     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5972                                         "vget_lane");
5973   case NEON::BI__builtin_neon_vget_lane_f64:
5974     Ops[0] = Builder.CreateBitCast(Ops[0],
5975         llvm::VectorType::get(DoubleTy, 1));
5976     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5977                                         "vget_lane");
5978   case NEON::BI__builtin_neon_vgetq_lane_f32:
5979   case NEON::BI__builtin_neon_vdups_laneq_f32:
5980     Ops[0] = Builder.CreateBitCast(Ops[0],
5981         llvm::VectorType::get(FloatTy, 4));
5982     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5983                                         "vgetq_lane");
5984   case NEON::BI__builtin_neon_vgetq_lane_f64:
5985   case NEON::BI__builtin_neon_vdupd_laneq_f64:
5986     Ops[0] = Builder.CreateBitCast(Ops[0],
5987         llvm::VectorType::get(DoubleTy, 2));
5988     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5989                                         "vgetq_lane");
5990   case NEON::BI__builtin_neon_vaddd_s64:
5991   case NEON::BI__builtin_neon_vaddd_u64:
5992     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
5993   case NEON::BI__builtin_neon_vsubd_s64:
5994   case NEON::BI__builtin_neon_vsubd_u64:
5995     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
5996   case NEON::BI__builtin_neon_vqdmlalh_s16:
5997   case NEON::BI__builtin_neon_vqdmlslh_s16: {
5998     SmallVector<Value *, 2> ProductOps;
5999     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6000     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
6001     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
6002     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6003                           ProductOps, "vqdmlXl");
6004     Constant *CI = ConstantInt::get(SizeTy, 0);
6005     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6006 
6007     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6008                                         ? Intrinsic::aarch64_neon_sqadd
6009                                         : Intrinsic::aarch64_neon_sqsub;
6010     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
6011   }
6012   case NEON::BI__builtin_neon_vqshlud_n_s64: {
6013     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6014     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6015     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
6016                         Ops, "vqshlu_n");
6017   }
6018   case NEON::BI__builtin_neon_vqshld_n_u64:
6019   case NEON::BI__builtin_neon_vqshld_n_s64: {
6020     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6021                                    ? Intrinsic::aarch64_neon_uqshl
6022                                    : Intrinsic::aarch64_neon_sqshl;
6023     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6024     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6025     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
6026   }
6027   case NEON::BI__builtin_neon_vrshrd_n_u64:
6028   case NEON::BI__builtin_neon_vrshrd_n_s64: {
6029     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6030                                    ? Intrinsic::aarch64_neon_urshl
6031                                    : Intrinsic::aarch64_neon_srshl;
6032     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6033     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
6034     Ops[1] = ConstantInt::get(Int64Ty, -SV);
6035     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
6036   }
6037   case NEON::BI__builtin_neon_vrsrad_n_u64:
6038   case NEON::BI__builtin_neon_vrsrad_n_s64: {
6039     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6040                                    ? Intrinsic::aarch64_neon_urshl
6041                                    : Intrinsic::aarch64_neon_srshl;
6042     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6043     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
6044     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
6045                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
6046     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
6047   }
6048   case NEON::BI__builtin_neon_vshld_n_s64:
6049   case NEON::BI__builtin_neon_vshld_n_u64: {
6050     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6051     return Builder.CreateShl(
6052         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
6053   }
6054   case NEON::BI__builtin_neon_vshrd_n_s64: {
6055     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6056     return Builder.CreateAShr(
6057         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6058                                                    Amt->getZExtValue())),
6059         "shrd_n");
6060   }
6061   case NEON::BI__builtin_neon_vshrd_n_u64: {
6062     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6063     uint64_t ShiftAmt = Amt->getZExtValue();
6064     // Right-shifting an unsigned value by its size yields 0.
6065     if (ShiftAmt == 64)
6066       return ConstantInt::get(Int64Ty, 0);
6067     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
6068                               "shrd_n");
6069   }
6070   case NEON::BI__builtin_neon_vsrad_n_s64: {
6071     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6072     Ops[1] = Builder.CreateAShr(
6073         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6074                                                    Amt->getZExtValue())),
6075         "shrd_n");
6076     return Builder.CreateAdd(Ops[0], Ops[1]);
6077   }
6078   case NEON::BI__builtin_neon_vsrad_n_u64: {
6079     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6080     uint64_t ShiftAmt = Amt->getZExtValue();
6081     // Right-shifting an unsigned value by its size yields 0.
6082     // As Op + 0 = Op, return Ops[0] directly.
6083     if (ShiftAmt == 64)
6084       return Ops[0];
6085     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
6086                                 "shrd_n");
6087     return Builder.CreateAdd(Ops[0], Ops[1]);
6088   }
6089   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6090   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6091   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6092   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6093     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6094                                           "lane");
6095     SmallVector<Value *, 2> ProductOps;
6096     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6097     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
6098     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
6099     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6100                           ProductOps, "vqdmlXl");
6101     Constant *CI = ConstantInt::get(SizeTy, 0);
6102     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6103     Ops.pop_back();
6104 
6105     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6106                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6107                           ? Intrinsic::aarch64_neon_sqadd
6108                           : Intrinsic::aarch64_neon_sqsub;
6109     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
6110   }
6111   case NEON::BI__builtin_neon_vqdmlals_s32:
6112   case NEON::BI__builtin_neon_vqdmlsls_s32: {
6113     SmallVector<Value *, 2> ProductOps;
6114     ProductOps.push_back(Ops[1]);
6115     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
6116     Ops[1] =
6117         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6118                      ProductOps, "vqdmlXl");
6119 
6120     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6121                                         ? Intrinsic::aarch64_neon_sqadd
6122                                         : Intrinsic::aarch64_neon_sqsub;
6123     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
6124   }
6125   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6126   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6127   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6128   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6129     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6130                                           "lane");
6131     SmallVector<Value *, 2> ProductOps;
6132     ProductOps.push_back(Ops[1]);
6133     ProductOps.push_back(Ops[2]);
6134     Ops[1] =
6135         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6136                      ProductOps, "vqdmlXl");
6137     Ops.pop_back();
6138 
6139     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6140                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6141                           ? Intrinsic::aarch64_neon_sqadd
6142                           : Intrinsic::aarch64_neon_sqsub;
6143     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6144   }
6145   }
6146 
6147   llvm::VectorType *VTy = GetNeonType(this, Type);
6148   llvm::Type *Ty = VTy;
6149   if (!Ty)
6150     return nullptr;
6151 
6152   // Not all intrinsics handled by the common case work for AArch64 yet, so only
6153   // defer to common code if it's been added to our special map.
6154   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
6155                                    AArch64SIMDIntrinsicsProvenSorted);
6156 
6157   if (Builtin)
6158     return EmitCommonNeonBuiltinExpr(
6159         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6160         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
6161         /*never use addresses*/ Address::invalid(), Address::invalid());
6162 
6163   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
6164     return V;
6165 
6166   unsigned Int;
6167   switch (BuiltinID) {
6168   default: return nullptr;
6169   case NEON::BI__builtin_neon_vbsl_v:
6170   case NEON::BI__builtin_neon_vbslq_v: {
6171     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6172     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6173     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6174     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6175 
6176     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6177     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6178     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6179     return Builder.CreateBitCast(Ops[0], Ty);
6180   }
6181   case NEON::BI__builtin_neon_vfma_lane_v:
6182   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6183     // The ARM builtins (and instructions) have the addend as the first
6184     // operand, but the 'fma' intrinsics have it last. Swap it around here.
6185     Value *Addend = Ops[0];
6186     Value *Multiplicand = Ops[1];
6187     Value *LaneSource = Ops[2];
6188     Ops[0] = Multiplicand;
6189     Ops[1] = LaneSource;
6190     Ops[2] = Addend;
6191 
6192     // Now adjust things to handle the lane access.
6193     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
6194       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
6195       VTy;
6196     llvm::Constant *cst = cast<Constant>(Ops[3]);
6197     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
6198     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6199     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6200 
6201     Ops.pop_back();
6202     Int = Intrinsic::fma;
6203     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6204   }
6205   case NEON::BI__builtin_neon_vfma_laneq_v: {
6206     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6207     // v1f64 fma should be mapped to Neon scalar f64 fma
6208     if (VTy && VTy->getElementType() == DoubleTy) {
6209       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6210       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6211       llvm::Type *VTy = GetNeonType(this,
6212         NeonTypeFlags(NeonTypeFlags::Float64, false, true));
6213       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6214       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6215       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
6216       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6217       return Builder.CreateBitCast(Result, Ty);
6218     }
6219     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6220     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6221     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6222 
6223     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
6224                                             VTy->getNumElements() * 2);
6225     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6226     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
6227                                                cast<ConstantInt>(Ops[3]));
6228     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6229 
6230     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6231   }
6232   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6233     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6234     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6235     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6236 
6237     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6238     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6239     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6240   }
6241   case NEON::BI__builtin_neon_vfmas_lane_f32:
6242   case NEON::BI__builtin_neon_vfmas_laneq_f32:
6243   case NEON::BI__builtin_neon_vfmad_lane_f64:
6244   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6245     Ops.push_back(EmitScalarExpr(E->getArg(3)));
6246     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6247     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6248     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6249     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6250   }
6251   case NEON::BI__builtin_neon_vmull_v:
6252     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6253     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6254     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6255     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6256   case NEON::BI__builtin_neon_vmax_v:
6257   case NEON::BI__builtin_neon_vmaxq_v:
6258     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6259     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6260     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6261     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6262   case NEON::BI__builtin_neon_vmin_v:
6263   case NEON::BI__builtin_neon_vminq_v:
6264     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6265     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6266     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6267     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6268   case NEON::BI__builtin_neon_vabd_v:
6269   case NEON::BI__builtin_neon_vabdq_v:
6270     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6271     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6272     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6273     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6274   case NEON::BI__builtin_neon_vpadal_v:
6275   case NEON::BI__builtin_neon_vpadalq_v: {
6276     unsigned ArgElts = VTy->getNumElements();
6277     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6278     unsigned BitWidth = EltTy->getBitWidth();
6279     llvm::Type *ArgTy = llvm::VectorType::get(
6280         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
6281     llvm::Type* Tys[2] = { VTy, ArgTy };
6282     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6283     SmallVector<llvm::Value*, 1> TmpOps;
6284     TmpOps.push_back(Ops[1]);
6285     Function *F = CGM.getIntrinsic(Int, Tys);
6286     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6287     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6288     return Builder.CreateAdd(tmp, addend);
6289   }
6290   case NEON::BI__builtin_neon_vpmin_v:
6291   case NEON::BI__builtin_neon_vpminq_v:
6292     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6293     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6294     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6295     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6296   case NEON::BI__builtin_neon_vpmax_v:
6297   case NEON::BI__builtin_neon_vpmaxq_v:
6298     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6299     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6300     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6301     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6302   case NEON::BI__builtin_neon_vminnm_v:
6303   case NEON::BI__builtin_neon_vminnmq_v:
6304     Int = Intrinsic::aarch64_neon_fminnm;
6305     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6306   case NEON::BI__builtin_neon_vmaxnm_v:
6307   case NEON::BI__builtin_neon_vmaxnmq_v:
6308     Int = Intrinsic::aarch64_neon_fmaxnm;
6309     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6310   case NEON::BI__builtin_neon_vrecpss_f32: {
6311     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6312     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6313                         Ops, "vrecps");
6314   }
6315   case NEON::BI__builtin_neon_vrecpsd_f64: {
6316     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6317     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6318                         Ops, "vrecps");
6319   }
6320   case NEON::BI__builtin_neon_vqshrun_n_v:
6321     Int = Intrinsic::aarch64_neon_sqshrun;
6322     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6323   case NEON::BI__builtin_neon_vqrshrun_n_v:
6324     Int = Intrinsic::aarch64_neon_sqrshrun;
6325     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6326   case NEON::BI__builtin_neon_vqshrn_n_v:
6327     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6328     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6329   case NEON::BI__builtin_neon_vrshrn_n_v:
6330     Int = Intrinsic::aarch64_neon_rshrn;
6331     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6332   case NEON::BI__builtin_neon_vqrshrn_n_v:
6333     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6334     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6335   case NEON::BI__builtin_neon_vrnda_v:
6336   case NEON::BI__builtin_neon_vrndaq_v: {
6337     Int = Intrinsic::round;
6338     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6339   }
6340   case NEON::BI__builtin_neon_vrndi_v:
6341   case NEON::BI__builtin_neon_vrndiq_v: {
6342     Int = Intrinsic::nearbyint;
6343     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
6344   }
6345   case NEON::BI__builtin_neon_vrndm_v:
6346   case NEON::BI__builtin_neon_vrndmq_v: {
6347     Int = Intrinsic::floor;
6348     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6349   }
6350   case NEON::BI__builtin_neon_vrndn_v:
6351   case NEON::BI__builtin_neon_vrndnq_v: {
6352     Int = Intrinsic::aarch64_neon_frintn;
6353     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6354   }
6355   case NEON::BI__builtin_neon_vrndp_v:
6356   case NEON::BI__builtin_neon_vrndpq_v: {
6357     Int = Intrinsic::ceil;
6358     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6359   }
6360   case NEON::BI__builtin_neon_vrndx_v:
6361   case NEON::BI__builtin_neon_vrndxq_v: {
6362     Int = Intrinsic::rint;
6363     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6364   }
6365   case NEON::BI__builtin_neon_vrnd_v:
6366   case NEON::BI__builtin_neon_vrndq_v: {
6367     Int = Intrinsic::trunc;
6368     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6369   }
6370   case NEON::BI__builtin_neon_vceqz_v:
6371   case NEON::BI__builtin_neon_vceqzq_v:
6372     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
6373                                          ICmpInst::ICMP_EQ, "vceqz");
6374   case NEON::BI__builtin_neon_vcgez_v:
6375   case NEON::BI__builtin_neon_vcgezq_v:
6376     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
6377                                          ICmpInst::ICMP_SGE, "vcgez");
6378   case NEON::BI__builtin_neon_vclez_v:
6379   case NEON::BI__builtin_neon_vclezq_v:
6380     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
6381                                          ICmpInst::ICMP_SLE, "vclez");
6382   case NEON::BI__builtin_neon_vcgtz_v:
6383   case NEON::BI__builtin_neon_vcgtzq_v:
6384     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
6385                                          ICmpInst::ICMP_SGT, "vcgtz");
6386   case NEON::BI__builtin_neon_vcltz_v:
6387   case NEON::BI__builtin_neon_vcltzq_v:
6388     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
6389                                          ICmpInst::ICMP_SLT, "vcltz");
6390   case NEON::BI__builtin_neon_vcvt_f64_v:
6391   case NEON::BI__builtin_neon_vcvtq_f64_v:
6392     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6393     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6394     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6395                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6396   case NEON::BI__builtin_neon_vcvt_f64_f32: {
6397     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6398            "unexpected vcvt_f64_f32 builtin");
6399     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6400     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6401 
6402     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6403   }
6404   case NEON::BI__builtin_neon_vcvt_f32_f64: {
6405     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6406            "unexpected vcvt_f32_f64 builtin");
6407     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6408     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6409 
6410     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6411   }
6412   case NEON::BI__builtin_neon_vcvt_s32_v:
6413   case NEON::BI__builtin_neon_vcvt_u32_v:
6414   case NEON::BI__builtin_neon_vcvt_s64_v:
6415   case NEON::BI__builtin_neon_vcvt_u64_v:
6416   case NEON::BI__builtin_neon_vcvtq_s32_v:
6417   case NEON::BI__builtin_neon_vcvtq_u32_v:
6418   case NEON::BI__builtin_neon_vcvtq_s64_v:
6419   case NEON::BI__builtin_neon_vcvtq_u64_v: {
6420     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
6421     if (usgn)
6422       return Builder.CreateFPToUI(Ops[0], Ty);
6423     return Builder.CreateFPToSI(Ops[0], Ty);
6424   }
6425   case NEON::BI__builtin_neon_vcvta_s32_v:
6426   case NEON::BI__builtin_neon_vcvtaq_s32_v:
6427   case NEON::BI__builtin_neon_vcvta_u32_v:
6428   case NEON::BI__builtin_neon_vcvtaq_u32_v:
6429   case NEON::BI__builtin_neon_vcvta_s64_v:
6430   case NEON::BI__builtin_neon_vcvtaq_s64_v:
6431   case NEON::BI__builtin_neon_vcvta_u64_v:
6432   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6433     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6434     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6435     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6436   }
6437   case NEON::BI__builtin_neon_vcvtm_s32_v:
6438   case NEON::BI__builtin_neon_vcvtmq_s32_v:
6439   case NEON::BI__builtin_neon_vcvtm_u32_v:
6440   case NEON::BI__builtin_neon_vcvtmq_u32_v:
6441   case NEON::BI__builtin_neon_vcvtm_s64_v:
6442   case NEON::BI__builtin_neon_vcvtmq_s64_v:
6443   case NEON::BI__builtin_neon_vcvtm_u64_v:
6444   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6445     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6446     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6447     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6448   }
6449   case NEON::BI__builtin_neon_vcvtn_s32_v:
6450   case NEON::BI__builtin_neon_vcvtnq_s32_v:
6451   case NEON::BI__builtin_neon_vcvtn_u32_v:
6452   case NEON::BI__builtin_neon_vcvtnq_u32_v:
6453   case NEON::BI__builtin_neon_vcvtn_s64_v:
6454   case NEON::BI__builtin_neon_vcvtnq_s64_v:
6455   case NEON::BI__builtin_neon_vcvtn_u64_v:
6456   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6457     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6458     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6459     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6460   }
6461   case NEON::BI__builtin_neon_vcvtp_s32_v:
6462   case NEON::BI__builtin_neon_vcvtpq_s32_v:
6463   case NEON::BI__builtin_neon_vcvtp_u32_v:
6464   case NEON::BI__builtin_neon_vcvtpq_u32_v:
6465   case NEON::BI__builtin_neon_vcvtp_s64_v:
6466   case NEON::BI__builtin_neon_vcvtpq_s64_v:
6467   case NEON::BI__builtin_neon_vcvtp_u64_v:
6468   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6469     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6470     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6471     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6472   }
6473   case NEON::BI__builtin_neon_vmulx_v:
6474   case NEON::BI__builtin_neon_vmulxq_v: {
6475     Int = Intrinsic::aarch64_neon_fmulx;
6476     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6477   }
6478   case NEON::BI__builtin_neon_vmul_lane_v:
6479   case NEON::BI__builtin_neon_vmul_laneq_v: {
6480     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6481     bool Quad = false;
6482     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6483       Quad = true;
6484     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6485     llvm::Type *VTy = GetNeonType(this,
6486       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
6487     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6488     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6489     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
6490     return Builder.CreateBitCast(Result, Ty);
6491   }
6492   case NEON::BI__builtin_neon_vnegd_s64:
6493     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
6494   case NEON::BI__builtin_neon_vpmaxnm_v:
6495   case NEON::BI__builtin_neon_vpmaxnmq_v: {
6496     Int = Intrinsic::aarch64_neon_fmaxnmp;
6497     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
6498   }
6499   case NEON::BI__builtin_neon_vpminnm_v:
6500   case NEON::BI__builtin_neon_vpminnmq_v: {
6501     Int = Intrinsic::aarch64_neon_fminnmp;
6502     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
6503   }
6504   case NEON::BI__builtin_neon_vsqrt_v:
6505   case NEON::BI__builtin_neon_vsqrtq_v: {
6506     Int = Intrinsic::sqrt;
6507     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6508     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
6509   }
6510   case NEON::BI__builtin_neon_vrbit_v:
6511   case NEON::BI__builtin_neon_vrbitq_v: {
6512     Int = Intrinsic::aarch64_neon_rbit;
6513     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
6514   }
6515   case NEON::BI__builtin_neon_vaddv_u8:
6516     // FIXME: These are handled by the AArch64 scalar code.
6517     usgn = true;
6518     // FALLTHROUGH
6519   case NEON::BI__builtin_neon_vaddv_s8: {
6520     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6521     Ty = Int32Ty;
6522     VTy = llvm::VectorType::get(Int8Ty, 8);
6523     llvm::Type *Tys[2] = { Ty, VTy };
6524     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6525     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6526     return Builder.CreateTrunc(Ops[0], Int8Ty);
6527   }
6528   case NEON::BI__builtin_neon_vaddv_u16:
6529     usgn = true;
6530     // FALLTHROUGH
6531   case NEON::BI__builtin_neon_vaddv_s16: {
6532     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6533     Ty = Int32Ty;
6534     VTy = llvm::VectorType::get(Int16Ty, 4);
6535     llvm::Type *Tys[2] = { Ty, VTy };
6536     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6537     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6538     return Builder.CreateTrunc(Ops[0], Int16Ty);
6539   }
6540   case NEON::BI__builtin_neon_vaddvq_u8:
6541     usgn = true;
6542     // FALLTHROUGH
6543   case NEON::BI__builtin_neon_vaddvq_s8: {
6544     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6545     Ty = Int32Ty;
6546     VTy = llvm::VectorType::get(Int8Ty, 16);
6547     llvm::Type *Tys[2] = { Ty, VTy };
6548     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6549     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6550     return Builder.CreateTrunc(Ops[0], Int8Ty);
6551   }
6552   case NEON::BI__builtin_neon_vaddvq_u16:
6553     usgn = true;
6554     // FALLTHROUGH
6555   case NEON::BI__builtin_neon_vaddvq_s16: {
6556     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6557     Ty = Int32Ty;
6558     VTy = llvm::VectorType::get(Int16Ty, 8);
6559     llvm::Type *Tys[2] = { Ty, VTy };
6560     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6561     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6562     return Builder.CreateTrunc(Ops[0], Int16Ty);
6563   }
6564   case NEON::BI__builtin_neon_vmaxv_u8: {
6565     Int = Intrinsic::aarch64_neon_umaxv;
6566     Ty = Int32Ty;
6567     VTy = llvm::VectorType::get(Int8Ty, 8);
6568     llvm::Type *Tys[2] = { Ty, VTy };
6569     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6570     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6571     return Builder.CreateTrunc(Ops[0], Int8Ty);
6572   }
6573   case NEON::BI__builtin_neon_vmaxv_u16: {
6574     Int = Intrinsic::aarch64_neon_umaxv;
6575     Ty = Int32Ty;
6576     VTy = llvm::VectorType::get(Int16Ty, 4);
6577     llvm::Type *Tys[2] = { Ty, VTy };
6578     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6579     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6580     return Builder.CreateTrunc(Ops[0], Int16Ty);
6581   }
6582   case NEON::BI__builtin_neon_vmaxvq_u8: {
6583     Int = Intrinsic::aarch64_neon_umaxv;
6584     Ty = Int32Ty;
6585     VTy = llvm::VectorType::get(Int8Ty, 16);
6586     llvm::Type *Tys[2] = { Ty, VTy };
6587     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6588     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6589     return Builder.CreateTrunc(Ops[0], Int8Ty);
6590   }
6591   case NEON::BI__builtin_neon_vmaxvq_u16: {
6592     Int = Intrinsic::aarch64_neon_umaxv;
6593     Ty = Int32Ty;
6594     VTy = llvm::VectorType::get(Int16Ty, 8);
6595     llvm::Type *Tys[2] = { Ty, VTy };
6596     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6597     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6598     return Builder.CreateTrunc(Ops[0], Int16Ty);
6599   }
6600   case NEON::BI__builtin_neon_vmaxv_s8: {
6601     Int = Intrinsic::aarch64_neon_smaxv;
6602     Ty = Int32Ty;
6603     VTy = llvm::VectorType::get(Int8Ty, 8);
6604     llvm::Type *Tys[2] = { Ty, VTy };
6605     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6606     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6607     return Builder.CreateTrunc(Ops[0], Int8Ty);
6608   }
6609   case NEON::BI__builtin_neon_vmaxv_s16: {
6610     Int = Intrinsic::aarch64_neon_smaxv;
6611     Ty = Int32Ty;
6612     VTy = llvm::VectorType::get(Int16Ty, 4);
6613     llvm::Type *Tys[2] = { Ty, VTy };
6614     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6615     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6616     return Builder.CreateTrunc(Ops[0], Int16Ty);
6617   }
6618   case NEON::BI__builtin_neon_vmaxvq_s8: {
6619     Int = Intrinsic::aarch64_neon_smaxv;
6620     Ty = Int32Ty;
6621     VTy = llvm::VectorType::get(Int8Ty, 16);
6622     llvm::Type *Tys[2] = { Ty, VTy };
6623     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6624     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6625     return Builder.CreateTrunc(Ops[0], Int8Ty);
6626   }
6627   case NEON::BI__builtin_neon_vmaxvq_s16: {
6628     Int = Intrinsic::aarch64_neon_smaxv;
6629     Ty = Int32Ty;
6630     VTy = llvm::VectorType::get(Int16Ty, 8);
6631     llvm::Type *Tys[2] = { Ty, VTy };
6632     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6633     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6634     return Builder.CreateTrunc(Ops[0], Int16Ty);
6635   }
6636   case NEON::BI__builtin_neon_vminv_u8: {
6637     Int = Intrinsic::aarch64_neon_uminv;
6638     Ty = Int32Ty;
6639     VTy = llvm::VectorType::get(Int8Ty, 8);
6640     llvm::Type *Tys[2] = { Ty, VTy };
6641     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6642     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6643     return Builder.CreateTrunc(Ops[0], Int8Ty);
6644   }
6645   case NEON::BI__builtin_neon_vminv_u16: {
6646     Int = Intrinsic::aarch64_neon_uminv;
6647     Ty = Int32Ty;
6648     VTy = llvm::VectorType::get(Int16Ty, 4);
6649     llvm::Type *Tys[2] = { Ty, VTy };
6650     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6651     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6652     return Builder.CreateTrunc(Ops[0], Int16Ty);
6653   }
6654   case NEON::BI__builtin_neon_vminvq_u8: {
6655     Int = Intrinsic::aarch64_neon_uminv;
6656     Ty = Int32Ty;
6657     VTy = llvm::VectorType::get(Int8Ty, 16);
6658     llvm::Type *Tys[2] = { Ty, VTy };
6659     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6660     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6661     return Builder.CreateTrunc(Ops[0], Int8Ty);
6662   }
6663   case NEON::BI__builtin_neon_vminvq_u16: {
6664     Int = Intrinsic::aarch64_neon_uminv;
6665     Ty = Int32Ty;
6666     VTy = llvm::VectorType::get(Int16Ty, 8);
6667     llvm::Type *Tys[2] = { Ty, VTy };
6668     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6669     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6670     return Builder.CreateTrunc(Ops[0], Int16Ty);
6671   }
6672   case NEON::BI__builtin_neon_vminv_s8: {
6673     Int = Intrinsic::aarch64_neon_sminv;
6674     Ty = Int32Ty;
6675     VTy = llvm::VectorType::get(Int8Ty, 8);
6676     llvm::Type *Tys[2] = { Ty, VTy };
6677     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6678     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6679     return Builder.CreateTrunc(Ops[0], Int8Ty);
6680   }
6681   case NEON::BI__builtin_neon_vminv_s16: {
6682     Int = Intrinsic::aarch64_neon_sminv;
6683     Ty = Int32Ty;
6684     VTy = llvm::VectorType::get(Int16Ty, 4);
6685     llvm::Type *Tys[2] = { Ty, VTy };
6686     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6687     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6688     return Builder.CreateTrunc(Ops[0], Int16Ty);
6689   }
6690   case NEON::BI__builtin_neon_vminvq_s8: {
6691     Int = Intrinsic::aarch64_neon_sminv;
6692     Ty = Int32Ty;
6693     VTy = llvm::VectorType::get(Int8Ty, 16);
6694     llvm::Type *Tys[2] = { Ty, VTy };
6695     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6696     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6697     return Builder.CreateTrunc(Ops[0], Int8Ty);
6698   }
6699   case NEON::BI__builtin_neon_vminvq_s16: {
6700     Int = Intrinsic::aarch64_neon_sminv;
6701     Ty = Int32Ty;
6702     VTy = llvm::VectorType::get(Int16Ty, 8);
6703     llvm::Type *Tys[2] = { Ty, VTy };
6704     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6705     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6706     return Builder.CreateTrunc(Ops[0], Int16Ty);
6707   }
6708   case NEON::BI__builtin_neon_vmul_n_f64: {
6709     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6710     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
6711     return Builder.CreateFMul(Ops[0], RHS);
6712   }
6713   case NEON::BI__builtin_neon_vaddlv_u8: {
6714     Int = Intrinsic::aarch64_neon_uaddlv;
6715     Ty = Int32Ty;
6716     VTy = llvm::VectorType::get(Int8Ty, 8);
6717     llvm::Type *Tys[2] = { Ty, VTy };
6718     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6719     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6720     return Builder.CreateTrunc(Ops[0], Int16Ty);
6721   }
6722   case NEON::BI__builtin_neon_vaddlv_u16: {
6723     Int = Intrinsic::aarch64_neon_uaddlv;
6724     Ty = Int32Ty;
6725     VTy = llvm::VectorType::get(Int16Ty, 4);
6726     llvm::Type *Tys[2] = { Ty, VTy };
6727     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6728     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6729   }
6730   case NEON::BI__builtin_neon_vaddlvq_u8: {
6731     Int = Intrinsic::aarch64_neon_uaddlv;
6732     Ty = Int32Ty;
6733     VTy = llvm::VectorType::get(Int8Ty, 16);
6734     llvm::Type *Tys[2] = { Ty, VTy };
6735     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6736     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6737     return Builder.CreateTrunc(Ops[0], Int16Ty);
6738   }
6739   case NEON::BI__builtin_neon_vaddlvq_u16: {
6740     Int = Intrinsic::aarch64_neon_uaddlv;
6741     Ty = Int32Ty;
6742     VTy = llvm::VectorType::get(Int16Ty, 8);
6743     llvm::Type *Tys[2] = { Ty, VTy };
6744     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6745     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6746   }
6747   case NEON::BI__builtin_neon_vaddlv_s8: {
6748     Int = Intrinsic::aarch64_neon_saddlv;
6749     Ty = Int32Ty;
6750     VTy = llvm::VectorType::get(Int8Ty, 8);
6751     llvm::Type *Tys[2] = { Ty, VTy };
6752     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6753     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6754     return Builder.CreateTrunc(Ops[0], Int16Ty);
6755   }
6756   case NEON::BI__builtin_neon_vaddlv_s16: {
6757     Int = Intrinsic::aarch64_neon_saddlv;
6758     Ty = Int32Ty;
6759     VTy = llvm::VectorType::get(Int16Ty, 4);
6760     llvm::Type *Tys[2] = { Ty, VTy };
6761     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6762     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6763   }
6764   case NEON::BI__builtin_neon_vaddlvq_s8: {
6765     Int = Intrinsic::aarch64_neon_saddlv;
6766     Ty = Int32Ty;
6767     VTy = llvm::VectorType::get(Int8Ty, 16);
6768     llvm::Type *Tys[2] = { Ty, VTy };
6769     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6770     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6771     return Builder.CreateTrunc(Ops[0], Int16Ty);
6772   }
6773   case NEON::BI__builtin_neon_vaddlvq_s16: {
6774     Int = Intrinsic::aarch64_neon_saddlv;
6775     Ty = Int32Ty;
6776     VTy = llvm::VectorType::get(Int16Ty, 8);
6777     llvm::Type *Tys[2] = { Ty, VTy };
6778     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6779     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6780   }
6781   case NEON::BI__builtin_neon_vsri_n_v:
6782   case NEON::BI__builtin_neon_vsriq_n_v: {
6783     Int = Intrinsic::aarch64_neon_vsri;
6784     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6785     return EmitNeonCall(Intrin, Ops, "vsri_n");
6786   }
6787   case NEON::BI__builtin_neon_vsli_n_v:
6788   case NEON::BI__builtin_neon_vsliq_n_v: {
6789     Int = Intrinsic::aarch64_neon_vsli;
6790     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6791     return EmitNeonCall(Intrin, Ops, "vsli_n");
6792   }
6793   case NEON::BI__builtin_neon_vsra_n_v:
6794   case NEON::BI__builtin_neon_vsraq_n_v:
6795     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6796     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6797     return Builder.CreateAdd(Ops[0], Ops[1]);
6798   case NEON::BI__builtin_neon_vrsra_n_v:
6799   case NEON::BI__builtin_neon_vrsraq_n_v: {
6800     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6801     SmallVector<llvm::Value*,2> TmpOps;
6802     TmpOps.push_back(Ops[1]);
6803     TmpOps.push_back(Ops[2]);
6804     Function* F = CGM.getIntrinsic(Int, Ty);
6805     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6806     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6807     return Builder.CreateAdd(Ops[0], tmp);
6808   }
6809     // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
6810     // of an Align parameter here.
6811   case NEON::BI__builtin_neon_vld1_x2_v:
6812   case NEON::BI__builtin_neon_vld1q_x2_v:
6813   case NEON::BI__builtin_neon_vld1_x3_v:
6814   case NEON::BI__builtin_neon_vld1q_x3_v:
6815   case NEON::BI__builtin_neon_vld1_x4_v:
6816   case NEON::BI__builtin_neon_vld1q_x4_v: {
6817     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6818     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6819     llvm::Type *Tys[2] = { VTy, PTy };
6820     unsigned Int;
6821     switch (BuiltinID) {
6822     case NEON::BI__builtin_neon_vld1_x2_v:
6823     case NEON::BI__builtin_neon_vld1q_x2_v:
6824       Int = Intrinsic::aarch64_neon_ld1x2;
6825       break;
6826     case NEON::BI__builtin_neon_vld1_x3_v:
6827     case NEON::BI__builtin_neon_vld1q_x3_v:
6828       Int = Intrinsic::aarch64_neon_ld1x3;
6829       break;
6830     case NEON::BI__builtin_neon_vld1_x4_v:
6831     case NEON::BI__builtin_neon_vld1q_x4_v:
6832       Int = Intrinsic::aarch64_neon_ld1x4;
6833       break;
6834     }
6835     Function *F = CGM.getIntrinsic(Int, Tys);
6836     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
6837     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6838     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6839     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6840   }
6841   case NEON::BI__builtin_neon_vst1_x2_v:
6842   case NEON::BI__builtin_neon_vst1q_x2_v:
6843   case NEON::BI__builtin_neon_vst1_x3_v:
6844   case NEON::BI__builtin_neon_vst1q_x3_v:
6845   case NEON::BI__builtin_neon_vst1_x4_v:
6846   case NEON::BI__builtin_neon_vst1q_x4_v: {
6847     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6848     llvm::Type *Tys[2] = { VTy, PTy };
6849     unsigned Int;
6850     switch (BuiltinID) {
6851     case NEON::BI__builtin_neon_vst1_x2_v:
6852     case NEON::BI__builtin_neon_vst1q_x2_v:
6853       Int = Intrinsic::aarch64_neon_st1x2;
6854       break;
6855     case NEON::BI__builtin_neon_vst1_x3_v:
6856     case NEON::BI__builtin_neon_vst1q_x3_v:
6857       Int = Intrinsic::aarch64_neon_st1x3;
6858       break;
6859     case NEON::BI__builtin_neon_vst1_x4_v:
6860     case NEON::BI__builtin_neon_vst1q_x4_v:
6861       Int = Intrinsic::aarch64_neon_st1x4;
6862       break;
6863     }
6864     std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6865     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
6866   }
6867   case NEON::BI__builtin_neon_vld1_v:
6868   case NEON::BI__builtin_neon_vld1q_v: {
6869     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6870     auto Alignment = CharUnits::fromQuantity(
6871         BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16);
6872     return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
6873   }
6874   case NEON::BI__builtin_neon_vst1_v:
6875   case NEON::BI__builtin_neon_vst1q_v:
6876     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6877     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6878     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6879   case NEON::BI__builtin_neon_vld1_lane_v:
6880   case NEON::BI__builtin_neon_vld1q_lane_v: {
6881     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6882     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6883     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6884     auto Alignment = CharUnits::fromQuantity(
6885         BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16);
6886     Ops[0] =
6887         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6888     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6889   }
6890   case NEON::BI__builtin_neon_vld1_dup_v:
6891   case NEON::BI__builtin_neon_vld1q_dup_v: {
6892     Value *V = UndefValue::get(Ty);
6893     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6894     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6895     auto Alignment = CharUnits::fromQuantity(
6896         BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16);
6897     Ops[0] =
6898         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6899     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6900     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6901     return EmitNeonSplat(Ops[0], CI);
6902   }
6903   case NEON::BI__builtin_neon_vst1_lane_v:
6904   case NEON::BI__builtin_neon_vst1q_lane_v:
6905     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6906     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6907     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6908     return Builder.CreateDefaultAlignedStore(Ops[1],
6909                                              Builder.CreateBitCast(Ops[0], Ty));
6910   case NEON::BI__builtin_neon_vld2_v:
6911   case NEON::BI__builtin_neon_vld2q_v: {
6912     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6913     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6914     llvm::Type *Tys[2] = { VTy, PTy };
6915     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6916     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6917     Ops[0] = Builder.CreateBitCast(Ops[0],
6918                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6919     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6920   }
6921   case NEON::BI__builtin_neon_vld3_v:
6922   case NEON::BI__builtin_neon_vld3q_v: {
6923     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6924     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6925     llvm::Type *Tys[2] = { VTy, PTy };
6926     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6927     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6928     Ops[0] = Builder.CreateBitCast(Ops[0],
6929                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6930     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6931   }
6932   case NEON::BI__builtin_neon_vld4_v:
6933   case NEON::BI__builtin_neon_vld4q_v: {
6934     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6935     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6936     llvm::Type *Tys[2] = { VTy, PTy };
6937     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6938     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6939     Ops[0] = Builder.CreateBitCast(Ops[0],
6940                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6941     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6942   }
6943   case NEON::BI__builtin_neon_vld2_dup_v:
6944   case NEON::BI__builtin_neon_vld2q_dup_v: {
6945     llvm::Type *PTy =
6946       llvm::PointerType::getUnqual(VTy->getElementType());
6947     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6948     llvm::Type *Tys[2] = { VTy, PTy };
6949     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6950     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6951     Ops[0] = Builder.CreateBitCast(Ops[0],
6952                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6953     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6954   }
6955   case NEON::BI__builtin_neon_vld3_dup_v:
6956   case NEON::BI__builtin_neon_vld3q_dup_v: {
6957     llvm::Type *PTy =
6958       llvm::PointerType::getUnqual(VTy->getElementType());
6959     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6960     llvm::Type *Tys[2] = { VTy, PTy };
6961     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6962     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6963     Ops[0] = Builder.CreateBitCast(Ops[0],
6964                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6965     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6966   }
6967   case NEON::BI__builtin_neon_vld4_dup_v:
6968   case NEON::BI__builtin_neon_vld4q_dup_v: {
6969     llvm::Type *PTy =
6970       llvm::PointerType::getUnqual(VTy->getElementType());
6971     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6972     llvm::Type *Tys[2] = { VTy, PTy };
6973     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6974     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6975     Ops[0] = Builder.CreateBitCast(Ops[0],
6976                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6977     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6978   }
6979   case NEON::BI__builtin_neon_vld2_lane_v:
6980   case NEON::BI__builtin_neon_vld2q_lane_v: {
6981     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6982     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6983     Ops.push_back(Ops[1]);
6984     Ops.erase(Ops.begin()+1);
6985     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6986     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6987     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6988     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
6989     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6990     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6991     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6992   }
6993   case NEON::BI__builtin_neon_vld3_lane_v:
6994   case NEON::BI__builtin_neon_vld3q_lane_v: {
6995     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6996     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6997     Ops.push_back(Ops[1]);
6998     Ops.erase(Ops.begin()+1);
6999     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7000     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7001     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7002     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7003     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
7004     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7005     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7006     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7007   }
7008   case NEON::BI__builtin_neon_vld4_lane_v:
7009   case NEON::BI__builtin_neon_vld4q_lane_v: {
7010     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7011     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
7012     Ops.push_back(Ops[1]);
7013     Ops.erase(Ops.begin()+1);
7014     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7015     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7016     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7017     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
7018     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
7019     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
7020     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7021     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7022     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7023   }
7024   case NEON::BI__builtin_neon_vst2_v:
7025   case NEON::BI__builtin_neon_vst2q_v: {
7026     Ops.push_back(Ops[0]);
7027     Ops.erase(Ops.begin());
7028     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7029     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
7030                         Ops, "");
7031   }
7032   case NEON::BI__builtin_neon_vst2_lane_v:
7033   case NEON::BI__builtin_neon_vst2q_lane_v: {
7034     Ops.push_back(Ops[0]);
7035     Ops.erase(Ops.begin());
7036     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
7037     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7038     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
7039                         Ops, "");
7040   }
7041   case NEON::BI__builtin_neon_vst3_v:
7042   case NEON::BI__builtin_neon_vst3q_v: {
7043     Ops.push_back(Ops[0]);
7044     Ops.erase(Ops.begin());
7045     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7046     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
7047                         Ops, "");
7048   }
7049   case NEON::BI__builtin_neon_vst3_lane_v:
7050   case NEON::BI__builtin_neon_vst3q_lane_v: {
7051     Ops.push_back(Ops[0]);
7052     Ops.erase(Ops.begin());
7053     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7054     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7055     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
7056                         Ops, "");
7057   }
7058   case NEON::BI__builtin_neon_vst4_v:
7059   case NEON::BI__builtin_neon_vst4q_v: {
7060     Ops.push_back(Ops[0]);
7061     Ops.erase(Ops.begin());
7062     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7063     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
7064                         Ops, "");
7065   }
7066   case NEON::BI__builtin_neon_vst4_lane_v:
7067   case NEON::BI__builtin_neon_vst4q_lane_v: {
7068     Ops.push_back(Ops[0]);
7069     Ops.erase(Ops.begin());
7070     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7071     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7072     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
7073                         Ops, "");
7074   }
7075   case NEON::BI__builtin_neon_vtrn_v:
7076   case NEON::BI__builtin_neon_vtrnq_v: {
7077     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7078     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7079     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7080     Value *SV = nullptr;
7081 
7082     for (unsigned vi = 0; vi != 2; ++vi) {
7083       SmallVector<uint32_t, 16> Indices;
7084       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7085         Indices.push_back(i+vi);
7086         Indices.push_back(i+e+vi);
7087       }
7088       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7089       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
7090       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7091     }
7092     return SV;
7093   }
7094   case NEON::BI__builtin_neon_vuzp_v:
7095   case NEON::BI__builtin_neon_vuzpq_v: {
7096     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7097     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7098     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7099     Value *SV = nullptr;
7100 
7101     for (unsigned vi = 0; vi != 2; ++vi) {
7102       SmallVector<uint32_t, 16> Indices;
7103       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7104         Indices.push_back(2*i+vi);
7105 
7106       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7107       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
7108       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7109     }
7110     return SV;
7111   }
7112   case NEON::BI__builtin_neon_vzip_v:
7113   case NEON::BI__builtin_neon_vzipq_v: {
7114     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7115     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7116     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7117     Value *SV = nullptr;
7118 
7119     for (unsigned vi = 0; vi != 2; ++vi) {
7120       SmallVector<uint32_t, 16> Indices;
7121       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7122         Indices.push_back((i + vi*e) >> 1);
7123         Indices.push_back(((i + vi*e) >> 1)+e);
7124       }
7125       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7126       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
7127       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7128     }
7129     return SV;
7130   }
7131   case NEON::BI__builtin_neon_vqtbl1q_v: {
7132     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7133                         Ops, "vtbl1");
7134   }
7135   case NEON::BI__builtin_neon_vqtbl2q_v: {
7136     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7137                         Ops, "vtbl2");
7138   }
7139   case NEON::BI__builtin_neon_vqtbl3q_v: {
7140     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7141                         Ops, "vtbl3");
7142   }
7143   case NEON::BI__builtin_neon_vqtbl4q_v: {
7144     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7145                         Ops, "vtbl4");
7146   }
7147   case NEON::BI__builtin_neon_vqtbx1q_v: {
7148     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7149                         Ops, "vtbx1");
7150   }
7151   case NEON::BI__builtin_neon_vqtbx2q_v: {
7152     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7153                         Ops, "vtbx2");
7154   }
7155   case NEON::BI__builtin_neon_vqtbx3q_v: {
7156     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7157                         Ops, "vtbx3");
7158   }
7159   case NEON::BI__builtin_neon_vqtbx4q_v: {
7160     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7161                         Ops, "vtbx4");
7162   }
7163   case NEON::BI__builtin_neon_vsqadd_v:
7164   case NEON::BI__builtin_neon_vsqaddq_v: {
7165     Int = Intrinsic::aarch64_neon_usqadd;
7166     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
7167   }
7168   case NEON::BI__builtin_neon_vuqadd_v:
7169   case NEON::BI__builtin_neon_vuqaddq_v: {
7170     Int = Intrinsic::aarch64_neon_suqadd;
7171     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
7172   }
7173   }
7174 }
7175 
7176 llvm::Value *CodeGenFunction::
7177 BuildVector(ArrayRef<llvm::Value*> Ops) {
7178   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7179          "Not a power-of-two sized vector!");
7180   bool AllConstants = true;
7181   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7182     AllConstants &= isa<Constant>(Ops[i]);
7183 
7184   // If this is a constant vector, create a ConstantVector.
7185   if (AllConstants) {
7186     SmallVector<llvm::Constant*, 16> CstOps;
7187     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7188       CstOps.push_back(cast<Constant>(Ops[i]));
7189     return llvm::ConstantVector::get(CstOps);
7190   }
7191 
7192   // Otherwise, insertelement the values to build the vector.
7193   Value *Result =
7194     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
7195 
7196   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7197     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
7198 
7199   return Result;
7200 }
7201 
7202 // Convert the mask from an integer type to a vector of i1.
7203 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
7204                               unsigned NumElts) {
7205 
7206   llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
7207                          cast<IntegerType>(Mask->getType())->getBitWidth());
7208   Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
7209 
7210   // If we have less than 8 elements, then the starting mask was an i8 and
7211   // we need to extract down to the right number of elements.
7212   if (NumElts < 8) {
7213     uint32_t Indices[4];
7214     for (unsigned i = 0; i != NumElts; ++i)
7215       Indices[i] = i;
7216     MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
7217                                              makeArrayRef(Indices, NumElts),
7218                                              "extract");
7219   }
7220   return MaskVec;
7221 }
7222 
7223 static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
7224                                  SmallVectorImpl<Value *> &Ops,
7225                                  unsigned Align) {
7226   // Cast the pointer to right type.
7227   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7228                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7229 
7230   // If the mask is all ones just emit a regular store.
7231   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7232     if (C->isAllOnesValue())
7233       return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
7234 
7235   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7236                                    Ops[1]->getType()->getVectorNumElements());
7237 
7238   return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
7239 }
7240 
7241 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
7242                                 SmallVectorImpl<Value *> &Ops, unsigned Align) {
7243   // Cast the pointer to right type.
7244   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7245                                llvm::PointerType::getUnqual(Ops[1]->getType()));
7246 
7247   // If the mask is all ones just emit a regular store.
7248   if (const auto *C = dyn_cast<Constant>(Ops[2]))
7249     if (C->isAllOnesValue())
7250       return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7251 
7252   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7253                                    Ops[1]->getType()->getVectorNumElements());
7254 
7255   return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
7256 }
7257 
7258 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
7259                                         SmallVectorImpl<Value *> &Ops,
7260                                         llvm::Type *DstTy,
7261                                         unsigned SrcSizeInBits,
7262                                         unsigned Align) {
7263   // Load the subvector.
7264   Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7265 
7266   // Create broadcast mask.
7267   unsigned NumDstElts = DstTy->getVectorNumElements();
7268   unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
7269 
7270   SmallVector<uint32_t, 8> Mask;
7271   for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
7272     for (unsigned j = 0; j != NumSrcElts; ++j)
7273       Mask.push_back(j);
7274 
7275   return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
7276 }
7277 
7278 static Value *EmitX86Select(CodeGenFunction &CGF,
7279                             Value *Mask, Value *Op0, Value *Op1) {
7280 
7281   // If the mask is all ones just return first argument.
7282   if (const auto *C = dyn_cast<Constant>(Mask))
7283     if (C->isAllOnesValue())
7284       return Op0;
7285 
7286   Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
7287 
7288   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
7289 }
7290 
7291 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
7292                                    bool Signed, SmallVectorImpl<Value *> &Ops) {
7293   unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7294   Value *Cmp;
7295 
7296   if (CC == 3) {
7297     Cmp = Constant::getNullValue(
7298                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7299   } else if (CC == 7) {
7300     Cmp = Constant::getAllOnesValue(
7301                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7302   } else {
7303     ICmpInst::Predicate Pred;
7304     switch (CC) {
7305     default: llvm_unreachable("Unknown condition code");
7306     case 0: Pred = ICmpInst::ICMP_EQ;  break;
7307     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
7308     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
7309     case 4: Pred = ICmpInst::ICMP_NE;  break;
7310     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
7311     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
7312     }
7313     Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7314   }
7315 
7316   const auto *C = dyn_cast<Constant>(Ops.back());
7317   if (!C || !C->isAllOnesValue())
7318     Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
7319 
7320   if (NumElts < 8) {
7321     uint32_t Indices[8];
7322     for (unsigned i = 0; i != NumElts; ++i)
7323       Indices[i] = i;
7324     for (unsigned i = NumElts; i != 8; ++i)
7325       Indices[i] = i % NumElts + NumElts;
7326     Cmp = CGF.Builder.CreateShuffleVector(
7327         Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
7328   }
7329   return CGF.Builder.CreateBitCast(Cmp,
7330                                    IntegerType::get(CGF.getLLVMContext(),
7331                                                     std::max(NumElts, 8U)));
7332 }
7333 
7334 static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) {
7335 
7336   llvm::Type *Ty = Ops[0]->getType();
7337   Value *Zero = llvm::Constant::getNullValue(Ty);
7338   Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]);
7339   Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero);
7340   Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub);
7341   if (Ops.size() == 1)
7342     return Res;
7343   return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
7344 }
7345 
7346 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
7347                             ArrayRef<Value *> Ops) {
7348   Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7349   Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7350 
7351   if (Ops.size() == 2)
7352     return Res;
7353 
7354   assert(Ops.size() == 4);
7355   return EmitX86Select(CGF, Ops[3], Res, Ops[2]);
7356 }
7357 
7358 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
7359                               llvm::Type *DstTy) {
7360   unsigned NumberOfElements = DstTy->getVectorNumElements();
7361   Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
7362   return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
7363 }
7364 
7365 Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
7366   const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
7367   StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
7368   return EmitX86CpuIs(CPUStr);
7369 }
7370 
7371 Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
7372 
7373   // This enum contains the vendor, type, and subtype enums from the
7374   // runtime library concatenated together. The _START labels mark
7375   // the start and are used to adjust the value into the correct
7376   // encoding space.
7377   enum X86CPUs {
7378     INTEL = 1,
7379     AMD,
7380     CPU_TYPE_START,
7381     INTEL_BONNELL,
7382     INTEL_CORE2,
7383     INTEL_COREI7,
7384     AMDFAM10H,
7385     AMDFAM15H,
7386     INTEL_SILVERMONT,
7387     INTEL_KNL,
7388     AMD_BTVER1,
7389     AMD_BTVER2,
7390     CPU_SUBTYPE_START,
7391     INTEL_COREI7_NEHALEM,
7392     INTEL_COREI7_WESTMERE,
7393     INTEL_COREI7_SANDYBRIDGE,
7394     AMDFAM10H_BARCELONA,
7395     AMDFAM10H_SHANGHAI,
7396     AMDFAM10H_ISTANBUL,
7397     AMDFAM15H_BDVER1,
7398     AMDFAM15H_BDVER2,
7399     AMDFAM15H_BDVER3,
7400     AMDFAM15H_BDVER4,
7401     AMDFAM17H_ZNVER1,
7402     INTEL_COREI7_IVYBRIDGE,
7403     INTEL_COREI7_HASWELL,
7404     INTEL_COREI7_BROADWELL,
7405     INTEL_COREI7_SKYLAKE,
7406     INTEL_COREI7_SKYLAKE_AVX512,
7407   };
7408 
7409   X86CPUs CPU =
7410     StringSwitch<X86CPUs>(CPUStr)
7411       .Case("amd", AMD)
7412       .Case("amdfam10h", AMDFAM10H)
7413       .Case("amdfam10", AMDFAM10H)
7414       .Case("amdfam15h", AMDFAM15H)
7415       .Case("amdfam15", AMDFAM15H)
7416       .Case("atom", INTEL_BONNELL)
7417       .Case("barcelona", AMDFAM10H_BARCELONA)
7418       .Case("bdver1", AMDFAM15H_BDVER1)
7419       .Case("bdver2", AMDFAM15H_BDVER2)
7420       .Case("bdver3", AMDFAM15H_BDVER3)
7421       .Case("bdver4", AMDFAM15H_BDVER4)
7422       .Case("bonnell", INTEL_BONNELL)
7423       .Case("broadwell", INTEL_COREI7_BROADWELL)
7424       .Case("btver1", AMD_BTVER1)
7425       .Case("btver2", AMD_BTVER2)
7426       .Case("core2", INTEL_CORE2)
7427       .Case("corei7", INTEL_COREI7)
7428       .Case("haswell", INTEL_COREI7_HASWELL)
7429       .Case("intel", INTEL)
7430       .Case("istanbul", AMDFAM10H_ISTANBUL)
7431       .Case("ivybridge", INTEL_COREI7_IVYBRIDGE)
7432       .Case("knl", INTEL_KNL)
7433       .Case("nehalem", INTEL_COREI7_NEHALEM)
7434       .Case("sandybridge", INTEL_COREI7_SANDYBRIDGE)
7435       .Case("shanghai", AMDFAM10H_SHANGHAI)
7436       .Case("silvermont", INTEL_SILVERMONT)
7437       .Case("skylake", INTEL_COREI7_SKYLAKE)
7438       .Case("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512)
7439       .Case("slm", INTEL_SILVERMONT)
7440       .Case("westmere", INTEL_COREI7_WESTMERE)
7441       .Case("znver1", AMDFAM17H_ZNVER1);
7442 
7443   llvm::Type *Int32Ty = Builder.getInt32Ty();
7444 
7445   // Matching the struct layout from the compiler-rt/libgcc structure that is
7446   // filled in:
7447   // unsigned int __cpu_vendor;
7448   // unsigned int __cpu_type;
7449   // unsigned int __cpu_subtype;
7450   // unsigned int __cpu_features[1];
7451   llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
7452                                           llvm::ArrayType::get(Int32Ty, 1));
7453 
7454   // Grab the global __cpu_model.
7455   llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
7456 
7457   // Calculate the index needed to access the correct field based on the
7458   // range. Also adjust the expected value.
7459   unsigned Index;
7460   unsigned Value;
7461   if (CPU > CPU_SUBTYPE_START) {
7462     Index = 2;
7463     Value = CPU - CPU_SUBTYPE_START;
7464   } else if (CPU > CPU_TYPE_START) {
7465     Index = 1;
7466     Value = CPU - CPU_TYPE_START;
7467   } else {
7468     Index = 0;
7469     Value = CPU;
7470   }
7471 
7472   // Grab the appropriate field from __cpu_model.
7473   llvm::Value *Idxs[] = {
7474     ConstantInt::get(Int32Ty, 0),
7475     ConstantInt::get(Int32Ty, Index)
7476   };
7477   llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
7478   CpuValue = Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4));
7479 
7480   // Check the value of the field against the requested value.
7481   return Builder.CreateICmpEQ(CpuValue,
7482                                   llvm::ConstantInt::get(Int32Ty, Value));
7483 }
7484 
7485 Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
7486   const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
7487   StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
7488   return EmitX86CpuSupports(FeatureStr);
7489 }
7490 
7491 Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
7492   // TODO: When/if this becomes more than x86 specific then use a TargetInfo
7493   // based mapping.
7494   // Processor features and mapping to processor feature value.
7495   enum X86Features {
7496     CMOV = 0,
7497     MMX,
7498     POPCNT,
7499     SSE,
7500     SSE2,
7501     SSE3,
7502     SSSE3,
7503     SSE4_1,
7504     SSE4_2,
7505     AVX,
7506     AVX2,
7507     SSE4_A,
7508     FMA4,
7509     XOP,
7510     FMA,
7511     AVX512F,
7512     BMI,
7513     BMI2,
7514     AES,
7515     PCLMUL,
7516     AVX512VL,
7517     AVX512BW,
7518     AVX512DQ,
7519     AVX512CD,
7520     AVX512ER,
7521     AVX512PF,
7522     AVX512VBMI,
7523     AVX512IFMA,
7524     AVX5124VNNIW,
7525     AVX5124FMAPS,
7526     AVX512VPOPCNTDQ,
7527     MAX
7528   };
7529 
7530   uint32_t FeaturesMask = 0;
7531 
7532   for (const StringRef &FeatureStr : FeatureStrs) {
7533     X86Features Feature =
7534         StringSwitch<X86Features>(FeatureStr)
7535             .Case("cmov", X86Features::CMOV)
7536             .Case("mmx", X86Features::MMX)
7537             .Case("popcnt", X86Features::POPCNT)
7538             .Case("sse", X86Features::SSE)
7539             .Case("sse2", X86Features::SSE2)
7540             .Case("sse3", X86Features::SSE3)
7541             .Case("ssse3", X86Features::SSSE3)
7542             .Case("sse4.1", X86Features::SSE4_1)
7543             .Case("sse4.2", X86Features::SSE4_2)
7544             .Case("avx", X86Features::AVX)
7545             .Case("avx2", X86Features::AVX2)
7546             .Case("sse4a", X86Features::SSE4_A)
7547             .Case("fma4", X86Features::FMA4)
7548             .Case("xop", X86Features::XOP)
7549             .Case("fma", X86Features::FMA)
7550             .Case("avx512f", X86Features::AVX512F)
7551             .Case("bmi", X86Features::BMI)
7552             .Case("bmi2", X86Features::BMI2)
7553             .Case("aes", X86Features::AES)
7554             .Case("pclmul", X86Features::PCLMUL)
7555             .Case("avx512vl", X86Features::AVX512VL)
7556             .Case("avx512bw", X86Features::AVX512BW)
7557             .Case("avx512dq", X86Features::AVX512DQ)
7558             .Case("avx512cd", X86Features::AVX512CD)
7559             .Case("avx512er", X86Features::AVX512ER)
7560             .Case("avx512pf", X86Features::AVX512PF)
7561             .Case("avx512vbmi", X86Features::AVX512VBMI)
7562             .Case("avx512ifma", X86Features::AVX512IFMA)
7563             .Case("avx5124vnniw", X86Features::AVX5124VNNIW)
7564             .Case("avx5124fmaps", X86Features::AVX5124FMAPS)
7565             .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ)
7566             .Default(X86Features::MAX);
7567     assert(Feature != X86Features::MAX && "Invalid feature!");
7568     FeaturesMask |= (1U << Feature);
7569   }
7570 
7571   // Matching the struct layout from the compiler-rt/libgcc structure that is
7572   // filled in:
7573   // unsigned int __cpu_vendor;
7574   // unsigned int __cpu_type;
7575   // unsigned int __cpu_subtype;
7576   // unsigned int __cpu_features[1];
7577   llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
7578                                           llvm::ArrayType::get(Int32Ty, 1));
7579 
7580   // Grab the global __cpu_model.
7581   llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
7582 
7583   // Grab the first (0th) element from the field __cpu_features off of the
7584   // global in the struct STy.
7585   Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 3),
7586                    ConstantInt::get(Int32Ty, 0)};
7587   Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
7588   Value *Features =
7589       Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4));
7590 
7591   // Check the value of the bit corresponding to the feature requested.
7592   Value *Bitset = Builder.CreateAnd(
7593       Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask));
7594   return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
7595 }
7596 
7597 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
7598                                            const CallExpr *E) {
7599   if (BuiltinID == X86::BI__builtin_cpu_is)
7600     return EmitX86CpuIs(E);
7601   if (BuiltinID == X86::BI__builtin_cpu_supports)
7602     return EmitX86CpuSupports(E);
7603 
7604   SmallVector<Value*, 4> Ops;
7605 
7606   // Find out if any arguments are required to be integer constant expressions.
7607   unsigned ICEArguments = 0;
7608   ASTContext::GetBuiltinTypeError Error;
7609   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
7610   assert(Error == ASTContext::GE_None && "Should not codegen an error");
7611 
7612   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
7613     // If this is a normal argument, just emit it as a scalar.
7614     if ((ICEArguments & (1 << i)) == 0) {
7615       Ops.push_back(EmitScalarExpr(E->getArg(i)));
7616       continue;
7617     }
7618 
7619     // If this is required to be a constant, constant fold it so that we know
7620     // that the generated intrinsic gets a ConstantInt.
7621     llvm::APSInt Result;
7622     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
7623     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
7624     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
7625   }
7626 
7627   // These exist so that the builtin that takes an immediate can be bounds
7628   // checked by clang to avoid passing bad immediates to the backend. Since
7629   // AVX has a larger immediate than SSE we would need separate builtins to
7630   // do the different bounds checking. Rather than create a clang specific
7631   // SSE only builtin, this implements eight separate builtins to match gcc
7632   // implementation.
7633   auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
7634     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
7635     llvm::Function *F = CGM.getIntrinsic(ID);
7636     return Builder.CreateCall(F, Ops);
7637   };
7638 
7639   // For the vector forms of FP comparisons, translate the builtins directly to
7640   // IR.
7641   // TODO: The builtins could be removed if the SSE header files used vector
7642   // extension comparisons directly (vector ordered/unordered may need
7643   // additional support via __builtin_isnan()).
7644   auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
7645     Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
7646     llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
7647     llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
7648     Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
7649     return Builder.CreateBitCast(Sext, FPVecTy);
7650   };
7651 
7652   switch (BuiltinID) {
7653   default: return nullptr;
7654   case X86::BI__builtin_cpu_init: {
7655     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
7656                                                       /*Variadic*/false);
7657     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy,
7658                                                      "__cpu_indicator_init");
7659     return Builder.CreateCall(Func);
7660   }
7661   case X86::BI_mm_prefetch: {
7662     Value *Address = Ops[0];
7663     Value *RW = ConstantInt::get(Int32Ty, 0);
7664     Value *Locality = Ops[1];
7665     Value *Data = ConstantInt::get(Int32Ty, 1);
7666     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
7667     return Builder.CreateCall(F, {Address, RW, Locality, Data});
7668   }
7669   case X86::BI_mm_clflush: {
7670     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
7671                               Ops[0]);
7672   }
7673   case X86::BI_mm_lfence: {
7674     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
7675   }
7676   case X86::BI_mm_mfence: {
7677     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
7678   }
7679   case X86::BI_mm_sfence: {
7680     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
7681   }
7682   case X86::BI_mm_pause: {
7683     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
7684   }
7685   case X86::BI__rdtsc: {
7686     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
7687   }
7688   case X86::BI__builtin_ia32_undef128:
7689   case X86::BI__builtin_ia32_undef256:
7690   case X86::BI__builtin_ia32_undef512:
7691     // The x86 definition of "undef" is not the same as the LLVM definition
7692     // (PR32176). We leave optimizing away an unnecessary zero constant to the
7693     // IR optimizer and backend.
7694     // TODO: If we had a "freeze" IR instruction to generate a fixed undef
7695     // value, we should use that here instead of a zero.
7696     return llvm::Constant::getNullValue(ConvertType(E->getType()));
7697   case X86::BI__builtin_ia32_vec_init_v8qi:
7698   case X86::BI__builtin_ia32_vec_init_v4hi:
7699   case X86::BI__builtin_ia32_vec_init_v2si:
7700     return Builder.CreateBitCast(BuildVector(Ops),
7701                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
7702   case X86::BI__builtin_ia32_vec_ext_v2si:
7703     return Builder.CreateExtractElement(Ops[0],
7704                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
7705   case X86::BI_mm_setcsr:
7706   case X86::BI__builtin_ia32_ldmxcsr: {
7707     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
7708     Builder.CreateStore(Ops[0], Tmp);
7709     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
7710                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7711   }
7712   case X86::BI_mm_getcsr:
7713   case X86::BI__builtin_ia32_stmxcsr: {
7714     Address Tmp = CreateMemTemp(E->getType());
7715     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
7716                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
7717     return Builder.CreateLoad(Tmp, "stmxcsr");
7718   }
7719   case X86::BI__builtin_ia32_xsave:
7720   case X86::BI__builtin_ia32_xsave64:
7721   case X86::BI__builtin_ia32_xrstor:
7722   case X86::BI__builtin_ia32_xrstor64:
7723   case X86::BI__builtin_ia32_xsaveopt:
7724   case X86::BI__builtin_ia32_xsaveopt64:
7725   case X86::BI__builtin_ia32_xrstors:
7726   case X86::BI__builtin_ia32_xrstors64:
7727   case X86::BI__builtin_ia32_xsavec:
7728   case X86::BI__builtin_ia32_xsavec64:
7729   case X86::BI__builtin_ia32_xsaves:
7730   case X86::BI__builtin_ia32_xsaves64: {
7731     Intrinsic::ID ID;
7732 #define INTRINSIC_X86_XSAVE_ID(NAME) \
7733     case X86::BI__builtin_ia32_##NAME: \
7734       ID = Intrinsic::x86_##NAME; \
7735       break
7736     switch (BuiltinID) {
7737     default: llvm_unreachable("Unsupported intrinsic!");
7738     INTRINSIC_X86_XSAVE_ID(xsave);
7739     INTRINSIC_X86_XSAVE_ID(xsave64);
7740     INTRINSIC_X86_XSAVE_ID(xrstor);
7741     INTRINSIC_X86_XSAVE_ID(xrstor64);
7742     INTRINSIC_X86_XSAVE_ID(xsaveopt);
7743     INTRINSIC_X86_XSAVE_ID(xsaveopt64);
7744     INTRINSIC_X86_XSAVE_ID(xrstors);
7745     INTRINSIC_X86_XSAVE_ID(xrstors64);
7746     INTRINSIC_X86_XSAVE_ID(xsavec);
7747     INTRINSIC_X86_XSAVE_ID(xsavec64);
7748     INTRINSIC_X86_XSAVE_ID(xsaves);
7749     INTRINSIC_X86_XSAVE_ID(xsaves64);
7750     }
7751 #undef INTRINSIC_X86_XSAVE_ID
7752     Value *Mhi = Builder.CreateTrunc(
7753       Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
7754     Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
7755     Ops[1] = Mhi;
7756     Ops.push_back(Mlo);
7757     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7758   }
7759   case X86::BI__builtin_ia32_storedqudi128_mask:
7760   case X86::BI__builtin_ia32_storedqusi128_mask:
7761   case X86::BI__builtin_ia32_storedquhi128_mask:
7762   case X86::BI__builtin_ia32_storedquqi128_mask:
7763   case X86::BI__builtin_ia32_storeupd128_mask:
7764   case X86::BI__builtin_ia32_storeups128_mask:
7765   case X86::BI__builtin_ia32_storedqudi256_mask:
7766   case X86::BI__builtin_ia32_storedqusi256_mask:
7767   case X86::BI__builtin_ia32_storedquhi256_mask:
7768   case X86::BI__builtin_ia32_storedquqi256_mask:
7769   case X86::BI__builtin_ia32_storeupd256_mask:
7770   case X86::BI__builtin_ia32_storeups256_mask:
7771   case X86::BI__builtin_ia32_storedqudi512_mask:
7772   case X86::BI__builtin_ia32_storedqusi512_mask:
7773   case X86::BI__builtin_ia32_storedquhi512_mask:
7774   case X86::BI__builtin_ia32_storedquqi512_mask:
7775   case X86::BI__builtin_ia32_storeupd512_mask:
7776   case X86::BI__builtin_ia32_storeups512_mask:
7777     return EmitX86MaskedStore(*this, Ops, 1);
7778 
7779   case X86::BI__builtin_ia32_storess128_mask:
7780   case X86::BI__builtin_ia32_storesd128_mask: {
7781     return EmitX86MaskedStore(*this, Ops, 16);
7782   }
7783   case X86::BI__builtin_ia32_vpopcntd_512:
7784   case X86::BI__builtin_ia32_vpopcntq_512: {
7785     llvm::Type *ResultType = ConvertType(E->getType());
7786     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
7787     return Builder.CreateCall(F, Ops);
7788   }
7789   case X86::BI__builtin_ia32_cvtmask2b128:
7790   case X86::BI__builtin_ia32_cvtmask2b256:
7791   case X86::BI__builtin_ia32_cvtmask2b512:
7792   case X86::BI__builtin_ia32_cvtmask2w128:
7793   case X86::BI__builtin_ia32_cvtmask2w256:
7794   case X86::BI__builtin_ia32_cvtmask2w512:
7795   case X86::BI__builtin_ia32_cvtmask2d128:
7796   case X86::BI__builtin_ia32_cvtmask2d256:
7797   case X86::BI__builtin_ia32_cvtmask2d512:
7798   case X86::BI__builtin_ia32_cvtmask2q128:
7799   case X86::BI__builtin_ia32_cvtmask2q256:
7800   case X86::BI__builtin_ia32_cvtmask2q512:
7801     return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
7802 
7803   case X86::BI__builtin_ia32_movdqa32store128_mask:
7804   case X86::BI__builtin_ia32_movdqa64store128_mask:
7805   case X86::BI__builtin_ia32_storeaps128_mask:
7806   case X86::BI__builtin_ia32_storeapd128_mask:
7807   case X86::BI__builtin_ia32_movdqa32store256_mask:
7808   case X86::BI__builtin_ia32_movdqa64store256_mask:
7809   case X86::BI__builtin_ia32_storeaps256_mask:
7810   case X86::BI__builtin_ia32_storeapd256_mask:
7811   case X86::BI__builtin_ia32_movdqa32store512_mask:
7812   case X86::BI__builtin_ia32_movdqa64store512_mask:
7813   case X86::BI__builtin_ia32_storeaps512_mask:
7814   case X86::BI__builtin_ia32_storeapd512_mask: {
7815     unsigned Align =
7816       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7817     return EmitX86MaskedStore(*this, Ops, Align);
7818   }
7819   case X86::BI__builtin_ia32_loadups128_mask:
7820   case X86::BI__builtin_ia32_loadups256_mask:
7821   case X86::BI__builtin_ia32_loadups512_mask:
7822   case X86::BI__builtin_ia32_loadupd128_mask:
7823   case X86::BI__builtin_ia32_loadupd256_mask:
7824   case X86::BI__builtin_ia32_loadupd512_mask:
7825   case X86::BI__builtin_ia32_loaddquqi128_mask:
7826   case X86::BI__builtin_ia32_loaddquqi256_mask:
7827   case X86::BI__builtin_ia32_loaddquqi512_mask:
7828   case X86::BI__builtin_ia32_loaddquhi128_mask:
7829   case X86::BI__builtin_ia32_loaddquhi256_mask:
7830   case X86::BI__builtin_ia32_loaddquhi512_mask:
7831   case X86::BI__builtin_ia32_loaddqusi128_mask:
7832   case X86::BI__builtin_ia32_loaddqusi256_mask:
7833   case X86::BI__builtin_ia32_loaddqusi512_mask:
7834   case X86::BI__builtin_ia32_loaddqudi128_mask:
7835   case X86::BI__builtin_ia32_loaddqudi256_mask:
7836   case X86::BI__builtin_ia32_loaddqudi512_mask:
7837     return EmitX86MaskedLoad(*this, Ops, 1);
7838 
7839   case X86::BI__builtin_ia32_loadss128_mask:
7840   case X86::BI__builtin_ia32_loadsd128_mask:
7841     return EmitX86MaskedLoad(*this, Ops, 16);
7842 
7843   case X86::BI__builtin_ia32_loadaps128_mask:
7844   case X86::BI__builtin_ia32_loadaps256_mask:
7845   case X86::BI__builtin_ia32_loadaps512_mask:
7846   case X86::BI__builtin_ia32_loadapd128_mask:
7847   case X86::BI__builtin_ia32_loadapd256_mask:
7848   case X86::BI__builtin_ia32_loadapd512_mask:
7849   case X86::BI__builtin_ia32_movdqa32load128_mask:
7850   case X86::BI__builtin_ia32_movdqa32load256_mask:
7851   case X86::BI__builtin_ia32_movdqa32load512_mask:
7852   case X86::BI__builtin_ia32_movdqa64load128_mask:
7853   case X86::BI__builtin_ia32_movdqa64load256_mask:
7854   case X86::BI__builtin_ia32_movdqa64load512_mask: {
7855     unsigned Align =
7856       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7857     return EmitX86MaskedLoad(*this, Ops, Align);
7858   }
7859 
7860   case X86::BI__builtin_ia32_vbroadcastf128_pd256:
7861   case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
7862     llvm::Type *DstTy = ConvertType(E->getType());
7863     return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
7864   }
7865 
7866   case X86::BI__builtin_ia32_storehps:
7867   case X86::BI__builtin_ia32_storelps: {
7868     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
7869     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
7870 
7871     // cast val v2i64
7872     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
7873 
7874     // extract (0, 1)
7875     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
7876     llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
7877     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
7878 
7879     // cast pointer to i64 & store
7880     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
7881     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7882   }
7883   case X86::BI__builtin_ia32_palignr128:
7884   case X86::BI__builtin_ia32_palignr256:
7885   case X86::BI__builtin_ia32_palignr512_mask: {
7886     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7887 
7888     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7889     assert(NumElts % 16 == 0);
7890 
7891     // If palignr is shifting the pair of vectors more than the size of two
7892     // lanes, emit zero.
7893     if (ShiftVal >= 32)
7894       return llvm::Constant::getNullValue(ConvertType(E->getType()));
7895 
7896     // If palignr is shifting the pair of input vectors more than one lane,
7897     // but less than two lanes, convert to shifting in zeroes.
7898     if (ShiftVal > 16) {
7899       ShiftVal -= 16;
7900       Ops[1] = Ops[0];
7901       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
7902     }
7903 
7904     uint32_t Indices[64];
7905     // 256-bit palignr operates on 128-bit lanes so we need to handle that
7906     for (unsigned l = 0; l != NumElts; l += 16) {
7907       for (unsigned i = 0; i != 16; ++i) {
7908         unsigned Idx = ShiftVal + i;
7909         if (Idx >= 16)
7910           Idx += NumElts - 16; // End of lane, switch operand.
7911         Indices[l + i] = Idx + l;
7912       }
7913     }
7914 
7915     Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
7916                                                makeArrayRef(Indices, NumElts),
7917                                                "palignr");
7918 
7919     // If this isn't a masked builtin, just return the align operation.
7920     if (Ops.size() == 3)
7921       return Align;
7922 
7923     return EmitX86Select(*this, Ops[4], Align, Ops[3]);
7924   }
7925 
7926   case X86::BI__builtin_ia32_vperm2f128_pd256:
7927   case X86::BI__builtin_ia32_vperm2f128_ps256:
7928   case X86::BI__builtin_ia32_vperm2f128_si256:
7929   case X86::BI__builtin_ia32_permti256: {
7930     unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7931     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7932 
7933     // This takes a very simple approach since there are two lanes and a
7934     // shuffle can have 2 inputs. So we reserve the first input for the first
7935     // lane and the second input for the second lane. This may result in
7936     // duplicate sources, but this can be dealt with in the backend.
7937 
7938     Value *OutOps[2];
7939     uint32_t Indices[8];
7940     for (unsigned l = 0; l != 2; ++l) {
7941       // Determine the source for this lane.
7942       if (Imm & (1 << ((l * 4) + 3)))
7943         OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
7944       else if (Imm & (1 << ((l * 4) + 1)))
7945         OutOps[l] = Ops[1];
7946       else
7947         OutOps[l] = Ops[0];
7948 
7949       for (unsigned i = 0; i != NumElts/2; ++i) {
7950         // Start with ith element of the source for this lane.
7951         unsigned Idx = (l * NumElts) + i;
7952         // If bit 0 of the immediate half is set, switch to the high half of
7953         // the source.
7954         if (Imm & (1 << (l * 4)))
7955           Idx += NumElts/2;
7956         Indices[(l * (NumElts/2)) + i] = Idx;
7957       }
7958     }
7959 
7960     return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
7961                                        makeArrayRef(Indices, NumElts),
7962                                        "vperm");
7963   }
7964 
7965   case X86::BI__builtin_ia32_movnti:
7966   case X86::BI__builtin_ia32_movnti64:
7967   case X86::BI__builtin_ia32_movntsd:
7968   case X86::BI__builtin_ia32_movntss: {
7969     llvm::MDNode *Node = llvm::MDNode::get(
7970         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7971 
7972     Value *Ptr = Ops[0];
7973     Value *Src = Ops[1];
7974 
7975     // Extract the 0'th element of the source vector.
7976     if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
7977         BuiltinID == X86::BI__builtin_ia32_movntss)
7978       Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
7979 
7980     // Convert the type of the pointer to a pointer to the stored type.
7981     Value *BC = Builder.CreateBitCast(
7982         Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
7983 
7984     // Unaligned nontemporal store of the scalar value.
7985     StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
7986     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7987     SI->setAlignment(1);
7988     return SI;
7989   }
7990 
7991   case X86::BI__builtin_ia32_selectb_128:
7992   case X86::BI__builtin_ia32_selectb_256:
7993   case X86::BI__builtin_ia32_selectb_512:
7994   case X86::BI__builtin_ia32_selectw_128:
7995   case X86::BI__builtin_ia32_selectw_256:
7996   case X86::BI__builtin_ia32_selectw_512:
7997   case X86::BI__builtin_ia32_selectd_128:
7998   case X86::BI__builtin_ia32_selectd_256:
7999   case X86::BI__builtin_ia32_selectd_512:
8000   case X86::BI__builtin_ia32_selectq_128:
8001   case X86::BI__builtin_ia32_selectq_256:
8002   case X86::BI__builtin_ia32_selectq_512:
8003   case X86::BI__builtin_ia32_selectps_128:
8004   case X86::BI__builtin_ia32_selectps_256:
8005   case X86::BI__builtin_ia32_selectps_512:
8006   case X86::BI__builtin_ia32_selectpd_128:
8007   case X86::BI__builtin_ia32_selectpd_256:
8008   case X86::BI__builtin_ia32_selectpd_512:
8009     return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
8010   case X86::BI__builtin_ia32_pcmpeqb128_mask:
8011   case X86::BI__builtin_ia32_pcmpeqb256_mask:
8012   case X86::BI__builtin_ia32_pcmpeqb512_mask:
8013   case X86::BI__builtin_ia32_pcmpeqw128_mask:
8014   case X86::BI__builtin_ia32_pcmpeqw256_mask:
8015   case X86::BI__builtin_ia32_pcmpeqw512_mask:
8016   case X86::BI__builtin_ia32_pcmpeqd128_mask:
8017   case X86::BI__builtin_ia32_pcmpeqd256_mask:
8018   case X86::BI__builtin_ia32_pcmpeqd512_mask:
8019   case X86::BI__builtin_ia32_pcmpeqq128_mask:
8020   case X86::BI__builtin_ia32_pcmpeqq256_mask:
8021   case X86::BI__builtin_ia32_pcmpeqq512_mask:
8022     return EmitX86MaskedCompare(*this, 0, false, Ops);
8023   case X86::BI__builtin_ia32_pcmpgtb128_mask:
8024   case X86::BI__builtin_ia32_pcmpgtb256_mask:
8025   case X86::BI__builtin_ia32_pcmpgtb512_mask:
8026   case X86::BI__builtin_ia32_pcmpgtw128_mask:
8027   case X86::BI__builtin_ia32_pcmpgtw256_mask:
8028   case X86::BI__builtin_ia32_pcmpgtw512_mask:
8029   case X86::BI__builtin_ia32_pcmpgtd128_mask:
8030   case X86::BI__builtin_ia32_pcmpgtd256_mask:
8031   case X86::BI__builtin_ia32_pcmpgtd512_mask:
8032   case X86::BI__builtin_ia32_pcmpgtq128_mask:
8033   case X86::BI__builtin_ia32_pcmpgtq256_mask:
8034   case X86::BI__builtin_ia32_pcmpgtq512_mask:
8035     return EmitX86MaskedCompare(*this, 6, true, Ops);
8036   case X86::BI__builtin_ia32_cmpb128_mask:
8037   case X86::BI__builtin_ia32_cmpb256_mask:
8038   case X86::BI__builtin_ia32_cmpb512_mask:
8039   case X86::BI__builtin_ia32_cmpw128_mask:
8040   case X86::BI__builtin_ia32_cmpw256_mask:
8041   case X86::BI__builtin_ia32_cmpw512_mask:
8042   case X86::BI__builtin_ia32_cmpd128_mask:
8043   case X86::BI__builtin_ia32_cmpd256_mask:
8044   case X86::BI__builtin_ia32_cmpd512_mask:
8045   case X86::BI__builtin_ia32_cmpq128_mask:
8046   case X86::BI__builtin_ia32_cmpq256_mask:
8047   case X86::BI__builtin_ia32_cmpq512_mask: {
8048     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
8049     return EmitX86MaskedCompare(*this, CC, true, Ops);
8050   }
8051   case X86::BI__builtin_ia32_ucmpb128_mask:
8052   case X86::BI__builtin_ia32_ucmpb256_mask:
8053   case X86::BI__builtin_ia32_ucmpb512_mask:
8054   case X86::BI__builtin_ia32_ucmpw128_mask:
8055   case X86::BI__builtin_ia32_ucmpw256_mask:
8056   case X86::BI__builtin_ia32_ucmpw512_mask:
8057   case X86::BI__builtin_ia32_ucmpd128_mask:
8058   case X86::BI__builtin_ia32_ucmpd256_mask:
8059   case X86::BI__builtin_ia32_ucmpd512_mask:
8060   case X86::BI__builtin_ia32_ucmpq128_mask:
8061   case X86::BI__builtin_ia32_ucmpq256_mask:
8062   case X86::BI__builtin_ia32_ucmpq512_mask: {
8063     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
8064     return EmitX86MaskedCompare(*this, CC, false, Ops);
8065   }
8066 
8067   case X86::BI__builtin_ia32_vplzcntd_128_mask:
8068   case X86::BI__builtin_ia32_vplzcntd_256_mask:
8069   case X86::BI__builtin_ia32_vplzcntd_512_mask:
8070   case X86::BI__builtin_ia32_vplzcntq_128_mask:
8071   case X86::BI__builtin_ia32_vplzcntq_256_mask:
8072   case X86::BI__builtin_ia32_vplzcntq_512_mask: {
8073     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
8074     return EmitX86Select(*this, Ops[2],
8075                          Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
8076                          Ops[1]);
8077   }
8078 
8079   case X86::BI__builtin_ia32_pabsb128:
8080   case X86::BI__builtin_ia32_pabsw128:
8081   case X86::BI__builtin_ia32_pabsd128:
8082   case X86::BI__builtin_ia32_pabsb256:
8083   case X86::BI__builtin_ia32_pabsw256:
8084   case X86::BI__builtin_ia32_pabsd256:
8085   case X86::BI__builtin_ia32_pabsq128_mask:
8086   case X86::BI__builtin_ia32_pabsq256_mask:
8087   case X86::BI__builtin_ia32_pabsb512_mask:
8088   case X86::BI__builtin_ia32_pabsw512_mask:
8089   case X86::BI__builtin_ia32_pabsd512_mask:
8090   case X86::BI__builtin_ia32_pabsq512_mask:
8091     return EmitX86Abs(*this, Ops);
8092 
8093   case X86::BI__builtin_ia32_pmaxsb128:
8094   case X86::BI__builtin_ia32_pmaxsw128:
8095   case X86::BI__builtin_ia32_pmaxsd128:
8096   case X86::BI__builtin_ia32_pmaxsq128_mask:
8097   case X86::BI__builtin_ia32_pmaxsb256:
8098   case X86::BI__builtin_ia32_pmaxsw256:
8099   case X86::BI__builtin_ia32_pmaxsd256:
8100   case X86::BI__builtin_ia32_pmaxsq256_mask:
8101   case X86::BI__builtin_ia32_pmaxsb512_mask:
8102   case X86::BI__builtin_ia32_pmaxsw512_mask:
8103   case X86::BI__builtin_ia32_pmaxsd512_mask:
8104   case X86::BI__builtin_ia32_pmaxsq512_mask:
8105     return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
8106   case X86::BI__builtin_ia32_pmaxub128:
8107   case X86::BI__builtin_ia32_pmaxuw128:
8108   case X86::BI__builtin_ia32_pmaxud128:
8109   case X86::BI__builtin_ia32_pmaxuq128_mask:
8110   case X86::BI__builtin_ia32_pmaxub256:
8111   case X86::BI__builtin_ia32_pmaxuw256:
8112   case X86::BI__builtin_ia32_pmaxud256:
8113   case X86::BI__builtin_ia32_pmaxuq256_mask:
8114   case X86::BI__builtin_ia32_pmaxub512_mask:
8115   case X86::BI__builtin_ia32_pmaxuw512_mask:
8116   case X86::BI__builtin_ia32_pmaxud512_mask:
8117   case X86::BI__builtin_ia32_pmaxuq512_mask:
8118     return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
8119   case X86::BI__builtin_ia32_pminsb128:
8120   case X86::BI__builtin_ia32_pminsw128:
8121   case X86::BI__builtin_ia32_pminsd128:
8122   case X86::BI__builtin_ia32_pminsq128_mask:
8123   case X86::BI__builtin_ia32_pminsb256:
8124   case X86::BI__builtin_ia32_pminsw256:
8125   case X86::BI__builtin_ia32_pminsd256:
8126   case X86::BI__builtin_ia32_pminsq256_mask:
8127   case X86::BI__builtin_ia32_pminsb512_mask:
8128   case X86::BI__builtin_ia32_pminsw512_mask:
8129   case X86::BI__builtin_ia32_pminsd512_mask:
8130   case X86::BI__builtin_ia32_pminsq512_mask:
8131     return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
8132   case X86::BI__builtin_ia32_pminub128:
8133   case X86::BI__builtin_ia32_pminuw128:
8134   case X86::BI__builtin_ia32_pminud128:
8135   case X86::BI__builtin_ia32_pminuq128_mask:
8136   case X86::BI__builtin_ia32_pminub256:
8137   case X86::BI__builtin_ia32_pminuw256:
8138   case X86::BI__builtin_ia32_pminud256:
8139   case X86::BI__builtin_ia32_pminuq256_mask:
8140   case X86::BI__builtin_ia32_pminub512_mask:
8141   case X86::BI__builtin_ia32_pminuw512_mask:
8142   case X86::BI__builtin_ia32_pminud512_mask:
8143   case X86::BI__builtin_ia32_pminuq512_mask:
8144     return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
8145 
8146   // 3DNow!
8147   case X86::BI__builtin_ia32_pswapdsf:
8148   case X86::BI__builtin_ia32_pswapdsi: {
8149     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
8150     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
8151     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
8152     return Builder.CreateCall(F, Ops, "pswapd");
8153   }
8154   case X86::BI__builtin_ia32_rdrand16_step:
8155   case X86::BI__builtin_ia32_rdrand32_step:
8156   case X86::BI__builtin_ia32_rdrand64_step:
8157   case X86::BI__builtin_ia32_rdseed16_step:
8158   case X86::BI__builtin_ia32_rdseed32_step:
8159   case X86::BI__builtin_ia32_rdseed64_step: {
8160     Intrinsic::ID ID;
8161     switch (BuiltinID) {
8162     default: llvm_unreachable("Unsupported intrinsic!");
8163     case X86::BI__builtin_ia32_rdrand16_step:
8164       ID = Intrinsic::x86_rdrand_16;
8165       break;
8166     case X86::BI__builtin_ia32_rdrand32_step:
8167       ID = Intrinsic::x86_rdrand_32;
8168       break;
8169     case X86::BI__builtin_ia32_rdrand64_step:
8170       ID = Intrinsic::x86_rdrand_64;
8171       break;
8172     case X86::BI__builtin_ia32_rdseed16_step:
8173       ID = Intrinsic::x86_rdseed_16;
8174       break;
8175     case X86::BI__builtin_ia32_rdseed32_step:
8176       ID = Intrinsic::x86_rdseed_32;
8177       break;
8178     case X86::BI__builtin_ia32_rdseed64_step:
8179       ID = Intrinsic::x86_rdseed_64;
8180       break;
8181     }
8182 
8183     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
8184     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
8185                                       Ops[0]);
8186     return Builder.CreateExtractValue(Call, 1);
8187   }
8188 
8189   // SSE packed comparison intrinsics
8190   case X86::BI__builtin_ia32_cmpeqps:
8191   case X86::BI__builtin_ia32_cmpeqpd:
8192     return getVectorFCmpIR(CmpInst::FCMP_OEQ);
8193   case X86::BI__builtin_ia32_cmpltps:
8194   case X86::BI__builtin_ia32_cmpltpd:
8195     return getVectorFCmpIR(CmpInst::FCMP_OLT);
8196   case X86::BI__builtin_ia32_cmpleps:
8197   case X86::BI__builtin_ia32_cmplepd:
8198     return getVectorFCmpIR(CmpInst::FCMP_OLE);
8199   case X86::BI__builtin_ia32_cmpunordps:
8200   case X86::BI__builtin_ia32_cmpunordpd:
8201     return getVectorFCmpIR(CmpInst::FCMP_UNO);
8202   case X86::BI__builtin_ia32_cmpneqps:
8203   case X86::BI__builtin_ia32_cmpneqpd:
8204     return getVectorFCmpIR(CmpInst::FCMP_UNE);
8205   case X86::BI__builtin_ia32_cmpnltps:
8206   case X86::BI__builtin_ia32_cmpnltpd:
8207     return getVectorFCmpIR(CmpInst::FCMP_UGE);
8208   case X86::BI__builtin_ia32_cmpnleps:
8209   case X86::BI__builtin_ia32_cmpnlepd:
8210     return getVectorFCmpIR(CmpInst::FCMP_UGT);
8211   case X86::BI__builtin_ia32_cmpordps:
8212   case X86::BI__builtin_ia32_cmpordpd:
8213     return getVectorFCmpIR(CmpInst::FCMP_ORD);
8214   case X86::BI__builtin_ia32_cmpps:
8215   case X86::BI__builtin_ia32_cmpps256:
8216   case X86::BI__builtin_ia32_cmppd:
8217   case X86::BI__builtin_ia32_cmppd256: {
8218     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
8219     // If this one of the SSE immediates, we can use native IR.
8220     if (CC < 8) {
8221       FCmpInst::Predicate Pred;
8222       switch (CC) {
8223       case 0: Pred = FCmpInst::FCMP_OEQ; break;
8224       case 1: Pred = FCmpInst::FCMP_OLT; break;
8225       case 2: Pred = FCmpInst::FCMP_OLE; break;
8226       case 3: Pred = FCmpInst::FCMP_UNO; break;
8227       case 4: Pred = FCmpInst::FCMP_UNE; break;
8228       case 5: Pred = FCmpInst::FCMP_UGE; break;
8229       case 6: Pred = FCmpInst::FCMP_UGT; break;
8230       case 7: Pred = FCmpInst::FCMP_ORD; break;
8231       }
8232       return getVectorFCmpIR(Pred);
8233     }
8234 
8235     // We can't handle 8-31 immediates with native IR, use the intrinsic.
8236     // Except for predicates that create constants.
8237     Intrinsic::ID ID;
8238     switch (BuiltinID) {
8239     default: llvm_unreachable("Unsupported intrinsic!");
8240     case X86::BI__builtin_ia32_cmpps:
8241       ID = Intrinsic::x86_sse_cmp_ps;
8242       break;
8243     case X86::BI__builtin_ia32_cmpps256:
8244       // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
8245       // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
8246       if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
8247          Value *Constant = (CC == 0xf || CC == 0x1f) ?
8248                 llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) :
8249                 llvm::Constant::getNullValue(Builder.getInt32Ty());
8250          Value *Vec = Builder.CreateVectorSplat(
8251                         Ops[0]->getType()->getVectorNumElements(), Constant);
8252          return Builder.CreateBitCast(Vec, Ops[0]->getType());
8253       }
8254       ID = Intrinsic::x86_avx_cmp_ps_256;
8255       break;
8256     case X86::BI__builtin_ia32_cmppd:
8257       ID = Intrinsic::x86_sse2_cmp_pd;
8258       break;
8259     case X86::BI__builtin_ia32_cmppd256:
8260       // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
8261       // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
8262       if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
8263          Value *Constant = (CC == 0xf || CC == 0x1f) ?
8264                 llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) :
8265                 llvm::Constant::getNullValue(Builder.getInt64Ty());
8266          Value *Vec = Builder.CreateVectorSplat(
8267                         Ops[0]->getType()->getVectorNumElements(), Constant);
8268          return Builder.CreateBitCast(Vec, Ops[0]->getType());
8269       }
8270       ID = Intrinsic::x86_avx_cmp_pd_256;
8271       break;
8272     }
8273 
8274     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
8275   }
8276 
8277   // SSE scalar comparison intrinsics
8278   case X86::BI__builtin_ia32_cmpeqss:
8279     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
8280   case X86::BI__builtin_ia32_cmpltss:
8281     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
8282   case X86::BI__builtin_ia32_cmpless:
8283     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
8284   case X86::BI__builtin_ia32_cmpunordss:
8285     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
8286   case X86::BI__builtin_ia32_cmpneqss:
8287     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
8288   case X86::BI__builtin_ia32_cmpnltss:
8289     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
8290   case X86::BI__builtin_ia32_cmpnless:
8291     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
8292   case X86::BI__builtin_ia32_cmpordss:
8293     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
8294   case X86::BI__builtin_ia32_cmpeqsd:
8295     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
8296   case X86::BI__builtin_ia32_cmpltsd:
8297     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
8298   case X86::BI__builtin_ia32_cmplesd:
8299     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
8300   case X86::BI__builtin_ia32_cmpunordsd:
8301     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
8302   case X86::BI__builtin_ia32_cmpneqsd:
8303     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
8304   case X86::BI__builtin_ia32_cmpnltsd:
8305     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
8306   case X86::BI__builtin_ia32_cmpnlesd:
8307     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
8308   case X86::BI__builtin_ia32_cmpordsd:
8309     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
8310 
8311   case X86::BI__emul:
8312   case X86::BI__emulu: {
8313     llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
8314     bool isSigned = (BuiltinID == X86::BI__emul);
8315     Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
8316     Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
8317     return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
8318   }
8319   case X86::BI__mulh:
8320   case X86::BI__umulh:
8321   case X86::BI_mul128:
8322   case X86::BI_umul128: {
8323     llvm::Type *ResType = ConvertType(E->getType());
8324     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
8325 
8326     bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
8327     Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
8328     Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
8329 
8330     Value *MulResult, *HigherBits;
8331     if (IsSigned) {
8332       MulResult = Builder.CreateNSWMul(LHS, RHS);
8333       HigherBits = Builder.CreateAShr(MulResult, 64);
8334     } else {
8335       MulResult = Builder.CreateNUWMul(LHS, RHS);
8336       HigherBits = Builder.CreateLShr(MulResult, 64);
8337     }
8338     HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
8339 
8340     if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
8341       return HigherBits;
8342 
8343     Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
8344     Builder.CreateStore(HigherBits, HighBitsAddress);
8345     return Builder.CreateIntCast(MulResult, ResType, IsSigned);
8346   }
8347 
8348   case X86::BI__faststorefence: {
8349     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8350                                llvm::SyncScope::System);
8351   }
8352   case X86::BI_ReadWriteBarrier:
8353   case X86::BI_ReadBarrier:
8354   case X86::BI_WriteBarrier: {
8355     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8356                                llvm::SyncScope::SingleThread);
8357   }
8358   case X86::BI_BitScanForward:
8359   case X86::BI_BitScanForward64:
8360     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
8361   case X86::BI_BitScanReverse:
8362   case X86::BI_BitScanReverse64:
8363     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
8364 
8365   case X86::BI_InterlockedAnd64:
8366     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
8367   case X86::BI_InterlockedExchange64:
8368     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
8369   case X86::BI_InterlockedExchangeAdd64:
8370     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
8371   case X86::BI_InterlockedExchangeSub64:
8372     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
8373   case X86::BI_InterlockedOr64:
8374     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
8375   case X86::BI_InterlockedXor64:
8376     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
8377   case X86::BI_InterlockedDecrement64:
8378     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
8379   case X86::BI_InterlockedIncrement64:
8380     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
8381 
8382   case X86::BI_AddressOfReturnAddress: {
8383     Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
8384     return Builder.CreateCall(F);
8385   }
8386   case X86::BI__stosb: {
8387     // We treat __stosb as a volatile memset - it may not generate "rep stosb"
8388     // instruction, but it will create a memset that won't be optimized away.
8389     return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
8390   }
8391   case X86::BI__ud2:
8392     // llvm.trap makes a ud2a instruction on x86.
8393     return EmitTrapCall(Intrinsic::trap);
8394   case X86::BI__int2c: {
8395     // This syscall signals a driver assertion failure in x86 NT kernels.
8396     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
8397     llvm::InlineAsm *IA =
8398         llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true);
8399     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
8400         getLLVMContext(), llvm::AttributeList::FunctionIndex,
8401         llvm::Attribute::NoReturn);
8402     CallSite CS = Builder.CreateCall(IA);
8403     CS.setAttributes(NoReturnAttr);
8404     return CS.getInstruction();
8405   }
8406   case X86::BI__readfsbyte:
8407   case X86::BI__readfsword:
8408   case X86::BI__readfsdword:
8409   case X86::BI__readfsqword: {
8410     llvm::Type *IntTy = ConvertType(E->getType());
8411     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8412                                         llvm::PointerType::get(IntTy, 257));
8413     LoadInst *Load = Builder.CreateAlignedLoad(
8414         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8415     Load->setVolatile(true);
8416     return Load;
8417   }
8418   case X86::BI__readgsbyte:
8419   case X86::BI__readgsword:
8420   case X86::BI__readgsdword:
8421   case X86::BI__readgsqword: {
8422     llvm::Type *IntTy = ConvertType(E->getType());
8423     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8424                                         llvm::PointerType::get(IntTy, 256));
8425     LoadInst *Load = Builder.CreateAlignedLoad(
8426         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8427     Load->setVolatile(true);
8428     return Load;
8429   }
8430   }
8431 }
8432 
8433 
8434 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
8435                                            const CallExpr *E) {
8436   SmallVector<Value*, 4> Ops;
8437 
8438   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
8439     Ops.push_back(EmitScalarExpr(E->getArg(i)));
8440 
8441   Intrinsic::ID ID = Intrinsic::not_intrinsic;
8442 
8443   switch (BuiltinID) {
8444   default: return nullptr;
8445 
8446   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
8447   // call __builtin_readcyclecounter.
8448   case PPC::BI__builtin_ppc_get_timebase:
8449     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
8450 
8451   // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
8452   case PPC::BI__builtin_altivec_lvx:
8453   case PPC::BI__builtin_altivec_lvxl:
8454   case PPC::BI__builtin_altivec_lvebx:
8455   case PPC::BI__builtin_altivec_lvehx:
8456   case PPC::BI__builtin_altivec_lvewx:
8457   case PPC::BI__builtin_altivec_lvsl:
8458   case PPC::BI__builtin_altivec_lvsr:
8459   case PPC::BI__builtin_vsx_lxvd2x:
8460   case PPC::BI__builtin_vsx_lxvw4x:
8461   case PPC::BI__builtin_vsx_lxvd2x_be:
8462   case PPC::BI__builtin_vsx_lxvw4x_be:
8463   case PPC::BI__builtin_vsx_lxvl:
8464   case PPC::BI__builtin_vsx_lxvll:
8465   {
8466     if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
8467        BuiltinID == PPC::BI__builtin_vsx_lxvll){
8468       Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
8469     }else {
8470       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8471       Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
8472       Ops.pop_back();
8473     }
8474 
8475     switch (BuiltinID) {
8476     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
8477     case PPC::BI__builtin_altivec_lvx:
8478       ID = Intrinsic::ppc_altivec_lvx;
8479       break;
8480     case PPC::BI__builtin_altivec_lvxl:
8481       ID = Intrinsic::ppc_altivec_lvxl;
8482       break;
8483     case PPC::BI__builtin_altivec_lvebx:
8484       ID = Intrinsic::ppc_altivec_lvebx;
8485       break;
8486     case PPC::BI__builtin_altivec_lvehx:
8487       ID = Intrinsic::ppc_altivec_lvehx;
8488       break;
8489     case PPC::BI__builtin_altivec_lvewx:
8490       ID = Intrinsic::ppc_altivec_lvewx;
8491       break;
8492     case PPC::BI__builtin_altivec_lvsl:
8493       ID = Intrinsic::ppc_altivec_lvsl;
8494       break;
8495     case PPC::BI__builtin_altivec_lvsr:
8496       ID = Intrinsic::ppc_altivec_lvsr;
8497       break;
8498     case PPC::BI__builtin_vsx_lxvd2x:
8499       ID = Intrinsic::ppc_vsx_lxvd2x;
8500       break;
8501     case PPC::BI__builtin_vsx_lxvw4x:
8502       ID = Intrinsic::ppc_vsx_lxvw4x;
8503       break;
8504     case PPC::BI__builtin_vsx_lxvd2x_be:
8505       ID = Intrinsic::ppc_vsx_lxvd2x_be;
8506       break;
8507     case PPC::BI__builtin_vsx_lxvw4x_be:
8508       ID = Intrinsic::ppc_vsx_lxvw4x_be;
8509       break;
8510     case PPC::BI__builtin_vsx_lxvl:
8511       ID = Intrinsic::ppc_vsx_lxvl;
8512       break;
8513     case PPC::BI__builtin_vsx_lxvll:
8514       ID = Intrinsic::ppc_vsx_lxvll;
8515       break;
8516     }
8517     llvm::Function *F = CGM.getIntrinsic(ID);
8518     return Builder.CreateCall(F, Ops, "");
8519   }
8520 
8521   // vec_st, vec_xst_be
8522   case PPC::BI__builtin_altivec_stvx:
8523   case PPC::BI__builtin_altivec_stvxl:
8524   case PPC::BI__builtin_altivec_stvebx:
8525   case PPC::BI__builtin_altivec_stvehx:
8526   case PPC::BI__builtin_altivec_stvewx:
8527   case PPC::BI__builtin_vsx_stxvd2x:
8528   case PPC::BI__builtin_vsx_stxvw4x:
8529   case PPC::BI__builtin_vsx_stxvd2x_be:
8530   case PPC::BI__builtin_vsx_stxvw4x_be:
8531   case PPC::BI__builtin_vsx_stxvl:
8532   case PPC::BI__builtin_vsx_stxvll:
8533   {
8534     if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
8535       BuiltinID == PPC::BI__builtin_vsx_stxvll ){
8536       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8537     }else {
8538       Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
8539       Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
8540       Ops.pop_back();
8541     }
8542 
8543     switch (BuiltinID) {
8544     default: llvm_unreachable("Unsupported st intrinsic!");
8545     case PPC::BI__builtin_altivec_stvx:
8546       ID = Intrinsic::ppc_altivec_stvx;
8547       break;
8548     case PPC::BI__builtin_altivec_stvxl:
8549       ID = Intrinsic::ppc_altivec_stvxl;
8550       break;
8551     case PPC::BI__builtin_altivec_stvebx:
8552       ID = Intrinsic::ppc_altivec_stvebx;
8553       break;
8554     case PPC::BI__builtin_altivec_stvehx:
8555       ID = Intrinsic::ppc_altivec_stvehx;
8556       break;
8557     case PPC::BI__builtin_altivec_stvewx:
8558       ID = Intrinsic::ppc_altivec_stvewx;
8559       break;
8560     case PPC::BI__builtin_vsx_stxvd2x:
8561       ID = Intrinsic::ppc_vsx_stxvd2x;
8562       break;
8563     case PPC::BI__builtin_vsx_stxvw4x:
8564       ID = Intrinsic::ppc_vsx_stxvw4x;
8565       break;
8566     case PPC::BI__builtin_vsx_stxvd2x_be:
8567       ID = Intrinsic::ppc_vsx_stxvd2x_be;
8568       break;
8569     case PPC::BI__builtin_vsx_stxvw4x_be:
8570       ID = Intrinsic::ppc_vsx_stxvw4x_be;
8571       break;
8572     case PPC::BI__builtin_vsx_stxvl:
8573       ID = Intrinsic::ppc_vsx_stxvl;
8574       break;
8575     case PPC::BI__builtin_vsx_stxvll:
8576       ID = Intrinsic::ppc_vsx_stxvll;
8577       break;
8578     }
8579     llvm::Function *F = CGM.getIntrinsic(ID);
8580     return Builder.CreateCall(F, Ops, "");
8581   }
8582   // Square root
8583   case PPC::BI__builtin_vsx_xvsqrtsp:
8584   case PPC::BI__builtin_vsx_xvsqrtdp: {
8585     llvm::Type *ResultType = ConvertType(E->getType());
8586     Value *X = EmitScalarExpr(E->getArg(0));
8587     ID = Intrinsic::sqrt;
8588     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8589     return Builder.CreateCall(F, X);
8590   }
8591   // Count leading zeros
8592   case PPC::BI__builtin_altivec_vclzb:
8593   case PPC::BI__builtin_altivec_vclzh:
8594   case PPC::BI__builtin_altivec_vclzw:
8595   case PPC::BI__builtin_altivec_vclzd: {
8596     llvm::Type *ResultType = ConvertType(E->getType());
8597     Value *X = EmitScalarExpr(E->getArg(0));
8598     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8599     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8600     return Builder.CreateCall(F, {X, Undef});
8601   }
8602   case PPC::BI__builtin_altivec_vctzb:
8603   case PPC::BI__builtin_altivec_vctzh:
8604   case PPC::BI__builtin_altivec_vctzw:
8605   case PPC::BI__builtin_altivec_vctzd: {
8606     llvm::Type *ResultType = ConvertType(E->getType());
8607     Value *X = EmitScalarExpr(E->getArg(0));
8608     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8609     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8610     return Builder.CreateCall(F, {X, Undef});
8611   }
8612   case PPC::BI__builtin_altivec_vpopcntb:
8613   case PPC::BI__builtin_altivec_vpopcnth:
8614   case PPC::BI__builtin_altivec_vpopcntw:
8615   case PPC::BI__builtin_altivec_vpopcntd: {
8616     llvm::Type *ResultType = ConvertType(E->getType());
8617     Value *X = EmitScalarExpr(E->getArg(0));
8618     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8619     return Builder.CreateCall(F, X);
8620   }
8621   // Copy sign
8622   case PPC::BI__builtin_vsx_xvcpsgnsp:
8623   case PPC::BI__builtin_vsx_xvcpsgndp: {
8624     llvm::Type *ResultType = ConvertType(E->getType());
8625     Value *X = EmitScalarExpr(E->getArg(0));
8626     Value *Y = EmitScalarExpr(E->getArg(1));
8627     ID = Intrinsic::copysign;
8628     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8629     return Builder.CreateCall(F, {X, Y});
8630   }
8631   // Rounding/truncation
8632   case PPC::BI__builtin_vsx_xvrspip:
8633   case PPC::BI__builtin_vsx_xvrdpip:
8634   case PPC::BI__builtin_vsx_xvrdpim:
8635   case PPC::BI__builtin_vsx_xvrspim:
8636   case PPC::BI__builtin_vsx_xvrdpi:
8637   case PPC::BI__builtin_vsx_xvrspi:
8638   case PPC::BI__builtin_vsx_xvrdpic:
8639   case PPC::BI__builtin_vsx_xvrspic:
8640   case PPC::BI__builtin_vsx_xvrdpiz:
8641   case PPC::BI__builtin_vsx_xvrspiz: {
8642     llvm::Type *ResultType = ConvertType(E->getType());
8643     Value *X = EmitScalarExpr(E->getArg(0));
8644     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
8645         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
8646       ID = Intrinsic::floor;
8647     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
8648              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
8649       ID = Intrinsic::round;
8650     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
8651              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
8652       ID = Intrinsic::nearbyint;
8653     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
8654              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
8655       ID = Intrinsic::ceil;
8656     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
8657              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
8658       ID = Intrinsic::trunc;
8659     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8660     return Builder.CreateCall(F, X);
8661   }
8662 
8663   // Absolute value
8664   case PPC::BI__builtin_vsx_xvabsdp:
8665   case PPC::BI__builtin_vsx_xvabssp: {
8666     llvm::Type *ResultType = ConvertType(E->getType());
8667     Value *X = EmitScalarExpr(E->getArg(0));
8668     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8669     return Builder.CreateCall(F, X);
8670   }
8671 
8672   // FMA variations
8673   case PPC::BI__builtin_vsx_xvmaddadp:
8674   case PPC::BI__builtin_vsx_xvmaddasp:
8675   case PPC::BI__builtin_vsx_xvnmaddadp:
8676   case PPC::BI__builtin_vsx_xvnmaddasp:
8677   case PPC::BI__builtin_vsx_xvmsubadp:
8678   case PPC::BI__builtin_vsx_xvmsubasp:
8679   case PPC::BI__builtin_vsx_xvnmsubadp:
8680   case PPC::BI__builtin_vsx_xvnmsubasp: {
8681     llvm::Type *ResultType = ConvertType(E->getType());
8682     Value *X = EmitScalarExpr(E->getArg(0));
8683     Value *Y = EmitScalarExpr(E->getArg(1));
8684     Value *Z = EmitScalarExpr(E->getArg(2));
8685     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8686     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8687     switch (BuiltinID) {
8688       case PPC::BI__builtin_vsx_xvmaddadp:
8689       case PPC::BI__builtin_vsx_xvmaddasp:
8690         return Builder.CreateCall(F, {X, Y, Z});
8691       case PPC::BI__builtin_vsx_xvnmaddadp:
8692       case PPC::BI__builtin_vsx_xvnmaddasp:
8693         return Builder.CreateFSub(Zero,
8694                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
8695       case PPC::BI__builtin_vsx_xvmsubadp:
8696       case PPC::BI__builtin_vsx_xvmsubasp:
8697         return Builder.CreateCall(F,
8698                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8699       case PPC::BI__builtin_vsx_xvnmsubadp:
8700       case PPC::BI__builtin_vsx_xvnmsubasp:
8701         Value *FsubRes =
8702           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8703         return Builder.CreateFSub(Zero, FsubRes, "sub");
8704     }
8705     llvm_unreachable("Unknown FMA operation");
8706     return nullptr; // Suppress no-return warning
8707   }
8708 
8709   case PPC::BI__builtin_vsx_insertword: {
8710     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
8711 
8712     // Third argument is a compile time constant int. It must be clamped to
8713     // to the range [0, 12].
8714     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8715     assert(ArgCI &&
8716            "Third arg to xxinsertw intrinsic must be constant integer");
8717     const int64_t MaxIndex = 12;
8718     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8719 
8720     // The builtin semantics don't exactly match the xxinsertw instructions
8721     // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
8722     // word from the first argument, and inserts it in the second argument. The
8723     // instruction extracts the word from its second input register and inserts
8724     // it into its first input register, so swap the first and second arguments.
8725     std::swap(Ops[0], Ops[1]);
8726 
8727     // Need to cast the second argument from a vector of unsigned int to a
8728     // vector of long long.
8729     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8730 
8731     if (getTarget().isLittleEndian()) {
8732       // Create a shuffle mask of (1, 0)
8733       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8734                                    ConstantInt::get(Int32Ty, 0)
8735                                  };
8736       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8737 
8738       // Reverse the double words in the vector we will extract from.
8739       Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8740       Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
8741 
8742       // Reverse the index.
8743       Index = MaxIndex - Index;
8744     }
8745 
8746     // Intrinsic expects the first arg to be a vector of int.
8747     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8748     Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
8749     return Builder.CreateCall(F, Ops);
8750   }
8751 
8752   case PPC::BI__builtin_vsx_extractuword: {
8753     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
8754 
8755     // Intrinsic expects the first argument to be a vector of doublewords.
8756     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8757 
8758     // The second argument is a compile time constant int that needs to
8759     // be clamped to the range [0, 12].
8760     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
8761     assert(ArgCI &&
8762            "Second Arg to xxextractuw intrinsic must be a constant integer!");
8763     const int64_t MaxIndex = 12;
8764     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8765 
8766     if (getTarget().isLittleEndian()) {
8767       // Reverse the index.
8768       Index = MaxIndex - Index;
8769       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8770 
8771       // Emit the call, then reverse the double words of the results vector.
8772       Value *Call = Builder.CreateCall(F, Ops);
8773 
8774       // Create a shuffle mask of (1, 0)
8775       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8776                                    ConstantInt::get(Int32Ty, 0)
8777                                  };
8778       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8779 
8780       Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
8781       return ShuffleCall;
8782     } else {
8783       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8784       return Builder.CreateCall(F, Ops);
8785     }
8786   }
8787 
8788   case PPC::BI__builtin_vsx_xxpermdi: {
8789     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8790     assert(ArgCI && "Third arg must be constant integer!");
8791 
8792     unsigned Index = ArgCI->getZExtValue();
8793     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8794     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8795 
8796     // Element zero comes from the first input vector and element one comes from
8797     // the second. The element indices within each vector are numbered in big
8798     // endian order so the shuffle mask must be adjusted for this on little
8799     // endian platforms (i.e. index is complemented and source vector reversed).
8800     unsigned ElemIdx0;
8801     unsigned ElemIdx1;
8802     if (getTarget().isLittleEndian()) {
8803       ElemIdx0 = (~Index & 1) + 2;
8804       ElemIdx1 = (~Index & 2) >> 1;
8805     } else { // BigEndian
8806       ElemIdx0 = (Index & 2) >> 1;
8807       ElemIdx1 = 2 + (Index & 1);
8808     }
8809 
8810     Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
8811                                 ConstantInt::get(Int32Ty, ElemIdx1)};
8812     Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8813 
8814     Value *ShuffleCall =
8815         Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
8816     QualType BIRetType = E->getType();
8817     auto RetTy = ConvertType(BIRetType);
8818     return Builder.CreateBitCast(ShuffleCall, RetTy);
8819   }
8820 
8821   case PPC::BI__builtin_vsx_xxsldwi: {
8822     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8823     assert(ArgCI && "Third argument must be a compile time constant");
8824     unsigned Index = ArgCI->getZExtValue() & 0x3;
8825     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8826     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
8827 
8828     // Create a shuffle mask
8829     unsigned ElemIdx0;
8830     unsigned ElemIdx1;
8831     unsigned ElemIdx2;
8832     unsigned ElemIdx3;
8833     if (getTarget().isLittleEndian()) {
8834       // Little endian element N comes from element 8+N-Index of the
8835       // concatenated wide vector (of course, using modulo arithmetic on
8836       // the total number of elements).
8837       ElemIdx0 = (8 - Index) % 8;
8838       ElemIdx1 = (9 - Index) % 8;
8839       ElemIdx2 = (10 - Index) % 8;
8840       ElemIdx3 = (11 - Index) % 8;
8841     } else {
8842       // Big endian ElemIdx<N> = Index + N
8843       ElemIdx0 = Index;
8844       ElemIdx1 = Index + 1;
8845       ElemIdx2 = Index + 2;
8846       ElemIdx3 = Index + 3;
8847     }
8848 
8849     Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
8850                                 ConstantInt::get(Int32Ty, ElemIdx1),
8851                                 ConstantInt::get(Int32Ty, ElemIdx2),
8852                                 ConstantInt::get(Int32Ty, ElemIdx3)};
8853 
8854     Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8855     Value *ShuffleCall =
8856         Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
8857     QualType BIRetType = E->getType();
8858     auto RetTy = ConvertType(BIRetType);
8859     return Builder.CreateBitCast(ShuffleCall, RetTy);
8860   }
8861   }
8862 }
8863 
8864 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
8865                                               const CallExpr *E) {
8866   switch (BuiltinID) {
8867   case AMDGPU::BI__builtin_amdgcn_div_scale:
8868   case AMDGPU::BI__builtin_amdgcn_div_scalef: {
8869     // Translate from the intrinsics's struct return to the builtin's out
8870     // argument.
8871 
8872     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
8873 
8874     llvm::Value *X = EmitScalarExpr(E->getArg(0));
8875     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
8876     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
8877 
8878     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
8879                                            X->getType());
8880 
8881     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
8882 
8883     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
8884     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
8885 
8886     llvm::Type *RealFlagType
8887       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
8888 
8889     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
8890     Builder.CreateStore(FlagExt, FlagOutPtr);
8891     return Result;
8892   }
8893   case AMDGPU::BI__builtin_amdgcn_div_fmas:
8894   case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
8895     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
8896     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
8897     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
8898     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
8899 
8900     llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
8901                                       Src0->getType());
8902     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
8903     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
8904   }
8905 
8906   case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
8907     return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
8908   case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
8909     llvm::SmallVector<llvm::Value *, 5> Args;
8910     for (unsigned I = 0; I != 5; ++I)
8911       Args.push_back(EmitScalarExpr(E->getArg(I)));
8912     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,
8913                                     Args[0]->getType());
8914     return Builder.CreateCall(F, Args);
8915   }
8916   case AMDGPU::BI__builtin_amdgcn_div_fixup:
8917   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
8918   case AMDGPU::BI__builtin_amdgcn_div_fixuph:
8919     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
8920   case AMDGPU::BI__builtin_amdgcn_trig_preop:
8921   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
8922     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
8923   case AMDGPU::BI__builtin_amdgcn_rcp:
8924   case AMDGPU::BI__builtin_amdgcn_rcpf:
8925   case AMDGPU::BI__builtin_amdgcn_rcph:
8926     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
8927   case AMDGPU::BI__builtin_amdgcn_rsq:
8928   case AMDGPU::BI__builtin_amdgcn_rsqf:
8929   case AMDGPU::BI__builtin_amdgcn_rsqh:
8930     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
8931   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
8932   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
8933     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
8934   case AMDGPU::BI__builtin_amdgcn_sinf:
8935   case AMDGPU::BI__builtin_amdgcn_sinh:
8936     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
8937   case AMDGPU::BI__builtin_amdgcn_cosf:
8938   case AMDGPU::BI__builtin_amdgcn_cosh:
8939     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
8940   case AMDGPU::BI__builtin_amdgcn_log_clampf:
8941     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
8942   case AMDGPU::BI__builtin_amdgcn_ldexp:
8943   case AMDGPU::BI__builtin_amdgcn_ldexpf:
8944   case AMDGPU::BI__builtin_amdgcn_ldexph:
8945     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
8946   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
8947   case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
8948   case AMDGPU::BI__builtin_amdgcn_frexp_manth:
8949     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
8950   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
8951   case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
8952     Value *Src0 = EmitScalarExpr(E->getArg(0));
8953     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8954                                 { Builder.getInt32Ty(), Src0->getType() });
8955     return Builder.CreateCall(F, Src0);
8956   }
8957   case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
8958     Value *Src0 = EmitScalarExpr(E->getArg(0));
8959     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8960                                 { Builder.getInt16Ty(), Src0->getType() });
8961     return Builder.CreateCall(F, Src0);
8962   }
8963   case AMDGPU::BI__builtin_amdgcn_fract:
8964   case AMDGPU::BI__builtin_amdgcn_fractf:
8965   case AMDGPU::BI__builtin_amdgcn_fracth:
8966     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
8967   case AMDGPU::BI__builtin_amdgcn_lerp:
8968     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
8969   case AMDGPU::BI__builtin_amdgcn_uicmp:
8970   case AMDGPU::BI__builtin_amdgcn_uicmpl:
8971   case AMDGPU::BI__builtin_amdgcn_sicmp:
8972   case AMDGPU::BI__builtin_amdgcn_sicmpl:
8973     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
8974   case AMDGPU::BI__builtin_amdgcn_fcmp:
8975   case AMDGPU::BI__builtin_amdgcn_fcmpf:
8976     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
8977   case AMDGPU::BI__builtin_amdgcn_class:
8978   case AMDGPU::BI__builtin_amdgcn_classf:
8979   case AMDGPU::BI__builtin_amdgcn_classh:
8980     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
8981   case AMDGPU::BI__builtin_amdgcn_fmed3f:
8982   case AMDGPU::BI__builtin_amdgcn_fmed3h:
8983     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
8984   case AMDGPU::BI__builtin_amdgcn_read_exec: {
8985     CallInst *CI = cast<CallInst>(
8986       EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
8987     CI->setConvergent();
8988     return CI;
8989   }
8990 
8991   // amdgcn workitem
8992   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
8993     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
8994   case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
8995     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
8996   case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
8997     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
8998 
8999   // r600 intrinsics
9000   case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
9001   case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
9002     return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
9003   case AMDGPU::BI__builtin_r600_read_tidig_x:
9004     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
9005   case AMDGPU::BI__builtin_r600_read_tidig_y:
9006     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
9007   case AMDGPU::BI__builtin_r600_read_tidig_z:
9008     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
9009   default:
9010     return nullptr;
9011   }
9012 }
9013 
9014 /// Handle a SystemZ function in which the final argument is a pointer
9015 /// to an int that receives the post-instruction CC value.  At the LLVM level
9016 /// this is represented as a function that returns a {result, cc} pair.
9017 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
9018                                          unsigned IntrinsicID,
9019                                          const CallExpr *E) {
9020   unsigned NumArgs = E->getNumArgs() - 1;
9021   SmallVector<Value *, 8> Args(NumArgs);
9022   for (unsigned I = 0; I < NumArgs; ++I)
9023     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
9024   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
9025   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
9026   Value *Call = CGF.Builder.CreateCall(F, Args);
9027   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
9028   CGF.Builder.CreateStore(CC, CCPtr);
9029   return CGF.Builder.CreateExtractValue(Call, 0);
9030 }
9031 
9032 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
9033                                                const CallExpr *E) {
9034   switch (BuiltinID) {
9035   case SystemZ::BI__builtin_tbegin: {
9036     Value *TDB = EmitScalarExpr(E->getArg(0));
9037     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
9038     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
9039     return Builder.CreateCall(F, {TDB, Control});
9040   }
9041   case SystemZ::BI__builtin_tbegin_nofloat: {
9042     Value *TDB = EmitScalarExpr(E->getArg(0));
9043     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
9044     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
9045     return Builder.CreateCall(F, {TDB, Control});
9046   }
9047   case SystemZ::BI__builtin_tbeginc: {
9048     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
9049     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
9050     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
9051     return Builder.CreateCall(F, {TDB, Control});
9052   }
9053   case SystemZ::BI__builtin_tabort: {
9054     Value *Data = EmitScalarExpr(E->getArg(0));
9055     Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
9056     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
9057   }
9058   case SystemZ::BI__builtin_non_tx_store: {
9059     Value *Address = EmitScalarExpr(E->getArg(0));
9060     Value *Data = EmitScalarExpr(E->getArg(1));
9061     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
9062     return Builder.CreateCall(F, {Data, Address});
9063   }
9064 
9065   // Vector builtins.  Note that most vector builtins are mapped automatically
9066   // to target-specific LLVM intrinsics.  The ones handled specially here can
9067   // be represented via standard LLVM IR, which is preferable to enable common
9068   // LLVM optimizations.
9069 
9070   case SystemZ::BI__builtin_s390_vpopctb:
9071   case SystemZ::BI__builtin_s390_vpopcth:
9072   case SystemZ::BI__builtin_s390_vpopctf:
9073   case SystemZ::BI__builtin_s390_vpopctg: {
9074     llvm::Type *ResultType = ConvertType(E->getType());
9075     Value *X = EmitScalarExpr(E->getArg(0));
9076     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
9077     return Builder.CreateCall(F, X);
9078   }
9079 
9080   case SystemZ::BI__builtin_s390_vclzb:
9081   case SystemZ::BI__builtin_s390_vclzh:
9082   case SystemZ::BI__builtin_s390_vclzf:
9083   case SystemZ::BI__builtin_s390_vclzg: {
9084     llvm::Type *ResultType = ConvertType(E->getType());
9085     Value *X = EmitScalarExpr(E->getArg(0));
9086     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
9087     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
9088     return Builder.CreateCall(F, {X, Undef});
9089   }
9090 
9091   case SystemZ::BI__builtin_s390_vctzb:
9092   case SystemZ::BI__builtin_s390_vctzh:
9093   case SystemZ::BI__builtin_s390_vctzf:
9094   case SystemZ::BI__builtin_s390_vctzg: {
9095     llvm::Type *ResultType = ConvertType(E->getType());
9096     Value *X = EmitScalarExpr(E->getArg(0));
9097     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
9098     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
9099     return Builder.CreateCall(F, {X, Undef});
9100   }
9101 
9102   case SystemZ::BI__builtin_s390_vfsqsb:
9103   case SystemZ::BI__builtin_s390_vfsqdb: {
9104     llvm::Type *ResultType = ConvertType(E->getType());
9105     Value *X = EmitScalarExpr(E->getArg(0));
9106     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
9107     return Builder.CreateCall(F, X);
9108   }
9109   case SystemZ::BI__builtin_s390_vfmasb:
9110   case SystemZ::BI__builtin_s390_vfmadb: {
9111     llvm::Type *ResultType = ConvertType(E->getType());
9112     Value *X = EmitScalarExpr(E->getArg(0));
9113     Value *Y = EmitScalarExpr(E->getArg(1));
9114     Value *Z = EmitScalarExpr(E->getArg(2));
9115     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
9116     return Builder.CreateCall(F, {X, Y, Z});
9117   }
9118   case SystemZ::BI__builtin_s390_vfmssb:
9119   case SystemZ::BI__builtin_s390_vfmsdb: {
9120     llvm::Type *ResultType = ConvertType(E->getType());
9121     Value *X = EmitScalarExpr(E->getArg(0));
9122     Value *Y = EmitScalarExpr(E->getArg(1));
9123     Value *Z = EmitScalarExpr(E->getArg(2));
9124     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
9125     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
9126     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
9127   }
9128   case SystemZ::BI__builtin_s390_vfnmasb:
9129   case SystemZ::BI__builtin_s390_vfnmadb: {
9130     llvm::Type *ResultType = ConvertType(E->getType());
9131     Value *X = EmitScalarExpr(E->getArg(0));
9132     Value *Y = EmitScalarExpr(E->getArg(1));
9133     Value *Z = EmitScalarExpr(E->getArg(2));
9134     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
9135     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
9136     return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub");
9137   }
9138   case SystemZ::BI__builtin_s390_vfnmssb:
9139   case SystemZ::BI__builtin_s390_vfnmsdb: {
9140     llvm::Type *ResultType = ConvertType(E->getType());
9141     Value *X = EmitScalarExpr(E->getArg(0));
9142     Value *Y = EmitScalarExpr(E->getArg(1));
9143     Value *Z = EmitScalarExpr(E->getArg(2));
9144     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
9145     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
9146     Value *NegZ = Builder.CreateFSub(Zero, Z, "sub");
9147     return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ}));
9148   }
9149   case SystemZ::BI__builtin_s390_vflpsb:
9150   case SystemZ::BI__builtin_s390_vflpdb: {
9151     llvm::Type *ResultType = ConvertType(E->getType());
9152     Value *X = EmitScalarExpr(E->getArg(0));
9153     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
9154     return Builder.CreateCall(F, X);
9155   }
9156   case SystemZ::BI__builtin_s390_vflnsb:
9157   case SystemZ::BI__builtin_s390_vflndb: {
9158     llvm::Type *ResultType = ConvertType(E->getType());
9159     Value *X = EmitScalarExpr(E->getArg(0));
9160     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
9161     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
9162     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
9163   }
9164   case SystemZ::BI__builtin_s390_vfisb:
9165   case SystemZ::BI__builtin_s390_vfidb: {
9166     llvm::Type *ResultType = ConvertType(E->getType());
9167     Value *X = EmitScalarExpr(E->getArg(0));
9168     // Constant-fold the M4 and M5 mask arguments.
9169     llvm::APSInt M4, M5;
9170     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
9171     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
9172     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
9173     (void)IsConstM4; (void)IsConstM5;
9174     // Check whether this instance can be represented via a LLVM standard
9175     // intrinsic.  We only support some combinations of M4 and M5.
9176     Intrinsic::ID ID = Intrinsic::not_intrinsic;
9177     switch (M4.getZExtValue()) {
9178     default: break;
9179     case 0:  // IEEE-inexact exception allowed
9180       switch (M5.getZExtValue()) {
9181       default: break;
9182       case 0: ID = Intrinsic::rint; break;
9183       }
9184       break;
9185     case 4:  // IEEE-inexact exception suppressed
9186       switch (M5.getZExtValue()) {
9187       default: break;
9188       case 0: ID = Intrinsic::nearbyint; break;
9189       case 1: ID = Intrinsic::round; break;
9190       case 5: ID = Intrinsic::trunc; break;
9191       case 6: ID = Intrinsic::ceil; break;
9192       case 7: ID = Intrinsic::floor; break;
9193       }
9194       break;
9195     }
9196     if (ID != Intrinsic::not_intrinsic) {
9197       Function *F = CGM.getIntrinsic(ID, ResultType);
9198       return Builder.CreateCall(F, X);
9199     }
9200     switch (BuiltinID) {
9201       case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
9202       case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
9203       default: llvm_unreachable("Unknown BuiltinID");
9204     }
9205     Function *F = CGM.getIntrinsic(ID);
9206     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
9207     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
9208     return Builder.CreateCall(F, {X, M4Value, M5Value});
9209   }
9210   case SystemZ::BI__builtin_s390_vfmaxsb:
9211   case SystemZ::BI__builtin_s390_vfmaxdb: {
9212     llvm::Type *ResultType = ConvertType(E->getType());
9213     Value *X = EmitScalarExpr(E->getArg(0));
9214     Value *Y = EmitScalarExpr(E->getArg(1));
9215     // Constant-fold the M4 mask argument.
9216     llvm::APSInt M4;
9217     bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
9218     assert(IsConstM4 && "Constant arg isn't actually constant?");
9219     (void)IsConstM4;
9220     // Check whether this instance can be represented via a LLVM standard
9221     // intrinsic.  We only support some values of M4.
9222     Intrinsic::ID ID = Intrinsic::not_intrinsic;
9223     switch (M4.getZExtValue()) {
9224     default: break;
9225     case 4: ID = Intrinsic::maxnum; break;
9226     }
9227     if (ID != Intrinsic::not_intrinsic) {
9228       Function *F = CGM.getIntrinsic(ID, ResultType);
9229       return Builder.CreateCall(F, {X, Y});
9230     }
9231     switch (BuiltinID) {
9232       case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
9233       case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
9234       default: llvm_unreachable("Unknown BuiltinID");
9235     }
9236     Function *F = CGM.getIntrinsic(ID);
9237     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
9238     return Builder.CreateCall(F, {X, Y, M4Value});
9239   }
9240   case SystemZ::BI__builtin_s390_vfminsb:
9241   case SystemZ::BI__builtin_s390_vfmindb: {
9242     llvm::Type *ResultType = ConvertType(E->getType());
9243     Value *X = EmitScalarExpr(E->getArg(0));
9244     Value *Y = EmitScalarExpr(E->getArg(1));
9245     // Constant-fold the M4 mask argument.
9246     llvm::APSInt M4;
9247     bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
9248     assert(IsConstM4 && "Constant arg isn't actually constant?");
9249     (void)IsConstM4;
9250     // Check whether this instance can be represented via a LLVM standard
9251     // intrinsic.  We only support some values of M4.
9252     Intrinsic::ID ID = Intrinsic::not_intrinsic;
9253     switch (M4.getZExtValue()) {
9254     default: break;
9255     case 4: ID = Intrinsic::minnum; break;
9256     }
9257     if (ID != Intrinsic::not_intrinsic) {
9258       Function *F = CGM.getIntrinsic(ID, ResultType);
9259       return Builder.CreateCall(F, {X, Y});
9260     }
9261     switch (BuiltinID) {
9262       case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
9263       case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
9264       default: llvm_unreachable("Unknown BuiltinID");
9265     }
9266     Function *F = CGM.getIntrinsic(ID);
9267     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
9268     return Builder.CreateCall(F, {X, Y, M4Value});
9269   }
9270 
9271   // Vector intrisincs that output the post-instruction CC value.
9272 
9273 #define INTRINSIC_WITH_CC(NAME) \
9274     case SystemZ::BI__builtin_##NAME: \
9275       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
9276 
9277   INTRINSIC_WITH_CC(s390_vpkshs);
9278   INTRINSIC_WITH_CC(s390_vpksfs);
9279   INTRINSIC_WITH_CC(s390_vpksgs);
9280 
9281   INTRINSIC_WITH_CC(s390_vpklshs);
9282   INTRINSIC_WITH_CC(s390_vpklsfs);
9283   INTRINSIC_WITH_CC(s390_vpklsgs);
9284 
9285   INTRINSIC_WITH_CC(s390_vceqbs);
9286   INTRINSIC_WITH_CC(s390_vceqhs);
9287   INTRINSIC_WITH_CC(s390_vceqfs);
9288   INTRINSIC_WITH_CC(s390_vceqgs);
9289 
9290   INTRINSIC_WITH_CC(s390_vchbs);
9291   INTRINSIC_WITH_CC(s390_vchhs);
9292   INTRINSIC_WITH_CC(s390_vchfs);
9293   INTRINSIC_WITH_CC(s390_vchgs);
9294 
9295   INTRINSIC_WITH_CC(s390_vchlbs);
9296   INTRINSIC_WITH_CC(s390_vchlhs);
9297   INTRINSIC_WITH_CC(s390_vchlfs);
9298   INTRINSIC_WITH_CC(s390_vchlgs);
9299 
9300   INTRINSIC_WITH_CC(s390_vfaebs);
9301   INTRINSIC_WITH_CC(s390_vfaehs);
9302   INTRINSIC_WITH_CC(s390_vfaefs);
9303 
9304   INTRINSIC_WITH_CC(s390_vfaezbs);
9305   INTRINSIC_WITH_CC(s390_vfaezhs);
9306   INTRINSIC_WITH_CC(s390_vfaezfs);
9307 
9308   INTRINSIC_WITH_CC(s390_vfeebs);
9309   INTRINSIC_WITH_CC(s390_vfeehs);
9310   INTRINSIC_WITH_CC(s390_vfeefs);
9311 
9312   INTRINSIC_WITH_CC(s390_vfeezbs);
9313   INTRINSIC_WITH_CC(s390_vfeezhs);
9314   INTRINSIC_WITH_CC(s390_vfeezfs);
9315 
9316   INTRINSIC_WITH_CC(s390_vfenebs);
9317   INTRINSIC_WITH_CC(s390_vfenehs);
9318   INTRINSIC_WITH_CC(s390_vfenefs);
9319 
9320   INTRINSIC_WITH_CC(s390_vfenezbs);
9321   INTRINSIC_WITH_CC(s390_vfenezhs);
9322   INTRINSIC_WITH_CC(s390_vfenezfs);
9323 
9324   INTRINSIC_WITH_CC(s390_vistrbs);
9325   INTRINSIC_WITH_CC(s390_vistrhs);
9326   INTRINSIC_WITH_CC(s390_vistrfs);
9327 
9328   INTRINSIC_WITH_CC(s390_vstrcbs);
9329   INTRINSIC_WITH_CC(s390_vstrchs);
9330   INTRINSIC_WITH_CC(s390_vstrcfs);
9331 
9332   INTRINSIC_WITH_CC(s390_vstrczbs);
9333   INTRINSIC_WITH_CC(s390_vstrczhs);
9334   INTRINSIC_WITH_CC(s390_vstrczfs);
9335 
9336   INTRINSIC_WITH_CC(s390_vfcesbs);
9337   INTRINSIC_WITH_CC(s390_vfcedbs);
9338   INTRINSIC_WITH_CC(s390_vfchsbs);
9339   INTRINSIC_WITH_CC(s390_vfchdbs);
9340   INTRINSIC_WITH_CC(s390_vfchesbs);
9341   INTRINSIC_WITH_CC(s390_vfchedbs);
9342 
9343   INTRINSIC_WITH_CC(s390_vftcisb);
9344   INTRINSIC_WITH_CC(s390_vftcidb);
9345 
9346 #undef INTRINSIC_WITH_CC
9347 
9348   default:
9349     return nullptr;
9350   }
9351 }
9352 
9353 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
9354                                              const CallExpr *E) {
9355   auto MakeLdg = [&](unsigned IntrinsicID) {
9356     Value *Ptr = EmitScalarExpr(E->getArg(0));
9357     clang::CharUnits Align =
9358         getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
9359     return Builder.CreateCall(
9360         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
9361                                        Ptr->getType()}),
9362         {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
9363   };
9364   auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
9365     Value *Ptr = EmitScalarExpr(E->getArg(0));
9366     return Builder.CreateCall(
9367         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
9368                                        Ptr->getType()}),
9369         {Ptr, EmitScalarExpr(E->getArg(1))});
9370   };
9371   switch (BuiltinID) {
9372   case NVPTX::BI__nvvm_atom_add_gen_i:
9373   case NVPTX::BI__nvvm_atom_add_gen_l:
9374   case NVPTX::BI__nvvm_atom_add_gen_ll:
9375     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
9376 
9377   case NVPTX::BI__nvvm_atom_sub_gen_i:
9378   case NVPTX::BI__nvvm_atom_sub_gen_l:
9379   case NVPTX::BI__nvvm_atom_sub_gen_ll:
9380     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
9381 
9382   case NVPTX::BI__nvvm_atom_and_gen_i:
9383   case NVPTX::BI__nvvm_atom_and_gen_l:
9384   case NVPTX::BI__nvvm_atom_and_gen_ll:
9385     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
9386 
9387   case NVPTX::BI__nvvm_atom_or_gen_i:
9388   case NVPTX::BI__nvvm_atom_or_gen_l:
9389   case NVPTX::BI__nvvm_atom_or_gen_ll:
9390     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
9391 
9392   case NVPTX::BI__nvvm_atom_xor_gen_i:
9393   case NVPTX::BI__nvvm_atom_xor_gen_l:
9394   case NVPTX::BI__nvvm_atom_xor_gen_ll:
9395     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
9396 
9397   case NVPTX::BI__nvvm_atom_xchg_gen_i:
9398   case NVPTX::BI__nvvm_atom_xchg_gen_l:
9399   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
9400     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
9401 
9402   case NVPTX::BI__nvvm_atom_max_gen_i:
9403   case NVPTX::BI__nvvm_atom_max_gen_l:
9404   case NVPTX::BI__nvvm_atom_max_gen_ll:
9405     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
9406 
9407   case NVPTX::BI__nvvm_atom_max_gen_ui:
9408   case NVPTX::BI__nvvm_atom_max_gen_ul:
9409   case NVPTX::BI__nvvm_atom_max_gen_ull:
9410     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
9411 
9412   case NVPTX::BI__nvvm_atom_min_gen_i:
9413   case NVPTX::BI__nvvm_atom_min_gen_l:
9414   case NVPTX::BI__nvvm_atom_min_gen_ll:
9415     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
9416 
9417   case NVPTX::BI__nvvm_atom_min_gen_ui:
9418   case NVPTX::BI__nvvm_atom_min_gen_ul:
9419   case NVPTX::BI__nvvm_atom_min_gen_ull:
9420     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
9421 
9422   case NVPTX::BI__nvvm_atom_cas_gen_i:
9423   case NVPTX::BI__nvvm_atom_cas_gen_l:
9424   case NVPTX::BI__nvvm_atom_cas_gen_ll:
9425     // __nvvm_atom_cas_gen_* should return the old value rather than the
9426     // success flag.
9427     return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
9428 
9429   case NVPTX::BI__nvvm_atom_add_gen_f: {
9430     Value *Ptr = EmitScalarExpr(E->getArg(0));
9431     Value *Val = EmitScalarExpr(E->getArg(1));
9432     // atomicrmw only deals with integer arguments so we need to use
9433     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
9434     Value *FnALAF32 =
9435         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
9436     return Builder.CreateCall(FnALAF32, {Ptr, Val});
9437   }
9438 
9439   case NVPTX::BI__nvvm_atom_inc_gen_ui: {
9440     Value *Ptr = EmitScalarExpr(E->getArg(0));
9441     Value *Val = EmitScalarExpr(E->getArg(1));
9442     Value *FnALI32 =
9443         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
9444     return Builder.CreateCall(FnALI32, {Ptr, Val});
9445   }
9446 
9447   case NVPTX::BI__nvvm_atom_dec_gen_ui: {
9448     Value *Ptr = EmitScalarExpr(E->getArg(0));
9449     Value *Val = EmitScalarExpr(E->getArg(1));
9450     Value *FnALD32 =
9451         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
9452     return Builder.CreateCall(FnALD32, {Ptr, Val});
9453   }
9454 
9455   case NVPTX::BI__nvvm_ldg_c:
9456   case NVPTX::BI__nvvm_ldg_c2:
9457   case NVPTX::BI__nvvm_ldg_c4:
9458   case NVPTX::BI__nvvm_ldg_s:
9459   case NVPTX::BI__nvvm_ldg_s2:
9460   case NVPTX::BI__nvvm_ldg_s4:
9461   case NVPTX::BI__nvvm_ldg_i:
9462   case NVPTX::BI__nvvm_ldg_i2:
9463   case NVPTX::BI__nvvm_ldg_i4:
9464   case NVPTX::BI__nvvm_ldg_l:
9465   case NVPTX::BI__nvvm_ldg_ll:
9466   case NVPTX::BI__nvvm_ldg_ll2:
9467   case NVPTX::BI__nvvm_ldg_uc:
9468   case NVPTX::BI__nvvm_ldg_uc2:
9469   case NVPTX::BI__nvvm_ldg_uc4:
9470   case NVPTX::BI__nvvm_ldg_us:
9471   case NVPTX::BI__nvvm_ldg_us2:
9472   case NVPTX::BI__nvvm_ldg_us4:
9473   case NVPTX::BI__nvvm_ldg_ui:
9474   case NVPTX::BI__nvvm_ldg_ui2:
9475   case NVPTX::BI__nvvm_ldg_ui4:
9476   case NVPTX::BI__nvvm_ldg_ul:
9477   case NVPTX::BI__nvvm_ldg_ull:
9478   case NVPTX::BI__nvvm_ldg_ull2:
9479     // PTX Interoperability section 2.2: "For a vector with an even number of
9480     // elements, its alignment is set to number of elements times the alignment
9481     // of its member: n*alignof(t)."
9482     return MakeLdg(Intrinsic::nvvm_ldg_global_i);
9483   case NVPTX::BI__nvvm_ldg_f:
9484   case NVPTX::BI__nvvm_ldg_f2:
9485   case NVPTX::BI__nvvm_ldg_f4:
9486   case NVPTX::BI__nvvm_ldg_d:
9487   case NVPTX::BI__nvvm_ldg_d2:
9488     return MakeLdg(Intrinsic::nvvm_ldg_global_f);
9489 
9490   case NVPTX::BI__nvvm_atom_cta_add_gen_i:
9491   case NVPTX::BI__nvvm_atom_cta_add_gen_l:
9492   case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
9493     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
9494   case NVPTX::BI__nvvm_atom_sys_add_gen_i:
9495   case NVPTX::BI__nvvm_atom_sys_add_gen_l:
9496   case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
9497     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
9498   case NVPTX::BI__nvvm_atom_cta_add_gen_f:
9499   case NVPTX::BI__nvvm_atom_cta_add_gen_d:
9500     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
9501   case NVPTX::BI__nvvm_atom_sys_add_gen_f:
9502   case NVPTX::BI__nvvm_atom_sys_add_gen_d:
9503     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
9504   case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
9505   case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
9506   case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
9507     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
9508   case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
9509   case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
9510   case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
9511     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
9512   case NVPTX::BI__nvvm_atom_cta_max_gen_i:
9513   case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
9514   case NVPTX::BI__nvvm_atom_cta_max_gen_l:
9515   case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
9516   case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
9517   case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
9518     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
9519   case NVPTX::BI__nvvm_atom_sys_max_gen_i:
9520   case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
9521   case NVPTX::BI__nvvm_atom_sys_max_gen_l:
9522   case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
9523   case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
9524   case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
9525     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
9526   case NVPTX::BI__nvvm_atom_cta_min_gen_i:
9527   case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
9528   case NVPTX::BI__nvvm_atom_cta_min_gen_l:
9529   case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
9530   case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
9531   case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
9532     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
9533   case NVPTX::BI__nvvm_atom_sys_min_gen_i:
9534   case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
9535   case NVPTX::BI__nvvm_atom_sys_min_gen_l:
9536   case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
9537   case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
9538   case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
9539     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
9540   case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
9541     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
9542   case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
9543     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
9544   case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
9545     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
9546   case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
9547     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
9548   case NVPTX::BI__nvvm_atom_cta_and_gen_i:
9549   case NVPTX::BI__nvvm_atom_cta_and_gen_l:
9550   case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
9551     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
9552   case NVPTX::BI__nvvm_atom_sys_and_gen_i:
9553   case NVPTX::BI__nvvm_atom_sys_and_gen_l:
9554   case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
9555     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
9556   case NVPTX::BI__nvvm_atom_cta_or_gen_i:
9557   case NVPTX::BI__nvvm_atom_cta_or_gen_l:
9558   case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
9559     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
9560   case NVPTX::BI__nvvm_atom_sys_or_gen_i:
9561   case NVPTX::BI__nvvm_atom_sys_or_gen_l:
9562   case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
9563     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
9564   case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
9565   case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
9566   case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
9567     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
9568   case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
9569   case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
9570   case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
9571     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
9572   case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
9573   case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
9574   case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
9575     Value *Ptr = EmitScalarExpr(E->getArg(0));
9576     return Builder.CreateCall(
9577         CGM.getIntrinsic(
9578             Intrinsic::nvvm_atomic_cas_gen_i_cta,
9579             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9580         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9581   }
9582   case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
9583   case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
9584   case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
9585     Value *Ptr = EmitScalarExpr(E->getArg(0));
9586     return Builder.CreateCall(
9587         CGM.getIntrinsic(
9588             Intrinsic::nvvm_atomic_cas_gen_i_sys,
9589             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9590         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9591   }
9592   case NVPTX::BI__nvvm_match_all_sync_i32p:
9593   case NVPTX::BI__nvvm_match_all_sync_i64p: {
9594     Value *Mask = EmitScalarExpr(E->getArg(0));
9595     Value *Val = EmitScalarExpr(E->getArg(1));
9596     Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
9597     Value *ResultPair = Builder.CreateCall(
9598         CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
9599                              ? Intrinsic::nvvm_match_all_sync_i32p
9600                              : Intrinsic::nvvm_match_all_sync_i64p),
9601         {Mask, Val});
9602     Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
9603                                      PredOutPtr.getElementType());
9604     Builder.CreateStore(Pred, PredOutPtr);
9605     return Builder.CreateExtractValue(ResultPair, 0);
9606   }
9607   default:
9608     return nullptr;
9609   }
9610 }
9611 
9612 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
9613                                                    const CallExpr *E) {
9614   switch (BuiltinID) {
9615   case WebAssembly::BI__builtin_wasm_current_memory: {
9616     llvm::Type *ResultType = ConvertType(E->getType());
9617     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
9618     return Builder.CreateCall(Callee);
9619   }
9620   case WebAssembly::BI__builtin_wasm_grow_memory: {
9621     Value *X = EmitScalarExpr(E->getArg(0));
9622     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
9623     return Builder.CreateCall(Callee, X);
9624   }
9625   case WebAssembly::BI__builtin_wasm_throw: {
9626     Value *Tag = EmitScalarExpr(E->getArg(0));
9627     Value *Obj = EmitScalarExpr(E->getArg(1));
9628     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
9629     return Builder.CreateCall(Callee, {Tag, Obj});
9630   }
9631   case WebAssembly::BI__builtin_wasm_rethrow: {
9632     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
9633     return Builder.CreateCall(Callee);
9634   }
9635 
9636   default:
9637     return nullptr;
9638   }
9639 }
9640