1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGObjCRuntime.h"
16 #include "CGOpenCLRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "CodeGenModule.h"
19 #include "ConstantEmitter.h"
20 #include "TargetInfo.h"
21 #include "clang/AST/ASTContext.h"
22 #include "clang/AST/Decl.h"
23 #include "clang/Analysis/Analyses/OSLog.h"
24 #include "clang/Basic/TargetBuiltins.h"
25 #include "clang/Basic/TargetInfo.h"
26 #include "clang/CodeGen/CGFunctionInfo.h"
27 #include "llvm/ADT/StringExtras.h"
28 #include "llvm/IR/CallSite.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/InlineAsm.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/MDBuilder.h"
33 #include "llvm/Support/ConvertUTF.h"
34 #include "llvm/Support/ScopedPrinter.h"
35 #include "llvm/Support/TargetParser.h"
36 #include <sstream>
37 
38 using namespace clang;
39 using namespace CodeGen;
40 using namespace llvm;
41 
42 static
43 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
44   return std::min(High, std::max(Low, Value));
45 }
46 
47 /// getBuiltinLibFunction - Given a builtin id for a function like
48 /// "__builtin_fabsf", return a Function* for "fabsf".
49 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
50                                                      unsigned BuiltinID) {
51   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
52 
53   // Get the name, skip over the __builtin_ prefix (if necessary).
54   StringRef Name;
55   GlobalDecl D(FD);
56 
57   // If the builtin has been declared explicitly with an assembler label,
58   // use the mangled name. This differs from the plain label on platforms
59   // that prefix labels.
60   if (FD->hasAttr<AsmLabelAttr>())
61     Name = getMangledName(D);
62   else
63     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
64 
65   llvm::FunctionType *Ty =
66     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
67 
68   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
69 }
70 
71 /// Emit the conversions required to turn the given value into an
72 /// integer of the given size.
73 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
74                         QualType T, llvm::IntegerType *IntType) {
75   V = CGF.EmitToMemory(V, T);
76 
77   if (V->getType()->isPointerTy())
78     return CGF.Builder.CreatePtrToInt(V, IntType);
79 
80   assert(V->getType() == IntType);
81   return V;
82 }
83 
84 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
85                           QualType T, llvm::Type *ResultType) {
86   V = CGF.EmitFromMemory(V, T);
87 
88   if (ResultType->isPointerTy())
89     return CGF.Builder.CreateIntToPtr(V, ResultType);
90 
91   assert(V->getType() == ResultType);
92   return V;
93 }
94 
95 /// Utility to insert an atomic instruction based on Instrinsic::ID
96 /// and the expression node.
97 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
98                                     llvm::AtomicRMWInst::BinOp Kind,
99                                     const CallExpr *E) {
100   QualType T = E->getType();
101   assert(E->getArg(0)->getType()->isPointerType());
102   assert(CGF.getContext().hasSameUnqualifiedType(T,
103                                   E->getArg(0)->getType()->getPointeeType()));
104   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
105 
106   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
107   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
108 
109   llvm::IntegerType *IntType =
110     llvm::IntegerType::get(CGF.getLLVMContext(),
111                            CGF.getContext().getTypeSize(T));
112   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
113 
114   llvm::Value *Args[2];
115   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
116   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
117   llvm::Type *ValueType = Args[1]->getType();
118   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
119 
120   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
121       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
122   return EmitFromInt(CGF, Result, T, ValueType);
123 }
124 
125 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
126   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
127   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
128 
129   // Convert the type of the pointer to a pointer to the stored type.
130   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
131   Value *BC = CGF.Builder.CreateBitCast(
132       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
133   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
134   LV.setNontemporal(true);
135   CGF.EmitStoreOfScalar(Val, LV, false);
136   return nullptr;
137 }
138 
139 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
140   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
141 
142   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
143   LV.setNontemporal(true);
144   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
145 }
146 
147 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
148                                llvm::AtomicRMWInst::BinOp Kind,
149                                const CallExpr *E) {
150   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
151 }
152 
153 /// Utility to insert an atomic instruction based Instrinsic::ID and
154 /// the expression node, where the return value is the result of the
155 /// operation.
156 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
157                                    llvm::AtomicRMWInst::BinOp Kind,
158                                    const CallExpr *E,
159                                    Instruction::BinaryOps Op,
160                                    bool Invert = false) {
161   QualType T = E->getType();
162   assert(E->getArg(0)->getType()->isPointerType());
163   assert(CGF.getContext().hasSameUnqualifiedType(T,
164                                   E->getArg(0)->getType()->getPointeeType()));
165   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
166 
167   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
168   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
169 
170   llvm::IntegerType *IntType =
171     llvm::IntegerType::get(CGF.getLLVMContext(),
172                            CGF.getContext().getTypeSize(T));
173   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
174 
175   llvm::Value *Args[2];
176   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
177   llvm::Type *ValueType = Args[1]->getType();
178   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
179   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
180 
181   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
182       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
183   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
184   if (Invert)
185     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
186                                      llvm::ConstantInt::get(IntType, -1));
187   Result = EmitFromInt(CGF, Result, T, ValueType);
188   return RValue::get(Result);
189 }
190 
191 /// @brief Utility to insert an atomic cmpxchg instruction.
192 ///
193 /// @param CGF The current codegen function.
194 /// @param E   Builtin call expression to convert to cmpxchg.
195 ///            arg0 - address to operate on
196 ///            arg1 - value to compare with
197 ///            arg2 - new value
198 /// @param ReturnBool Specifies whether to return success flag of
199 ///                   cmpxchg result or the old value.
200 ///
201 /// @returns result of cmpxchg, according to ReturnBool
202 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
203                                      bool ReturnBool) {
204   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
205   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
206   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
207 
208   llvm::IntegerType *IntType = llvm::IntegerType::get(
209       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
210   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
211 
212   Value *Args[3];
213   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
214   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
215   llvm::Type *ValueType = Args[1]->getType();
216   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
217   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
218 
219   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
220       Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
221       llvm::AtomicOrdering::SequentiallyConsistent);
222   if (ReturnBool)
223     // Extract boolean success flag and zext it to int.
224     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
225                                   CGF.ConvertType(E->getType()));
226   else
227     // Extract old value and emit it using the same type as compare value.
228     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
229                        ValueType);
230 }
231 
232 // Emit a simple mangled intrinsic that has 1 argument and a return type
233 // matching the argument type.
234 static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
235                                const CallExpr *E,
236                                unsigned IntrinsicID) {
237   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
238 
239   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
240   return CGF.Builder.CreateCall(F, Src0);
241 }
242 
243 // Emit an intrinsic that has 2 operands of the same type as its result.
244 static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
245                                 const CallExpr *E,
246                                 unsigned IntrinsicID) {
247   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
248   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
249 
250   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
251   return CGF.Builder.CreateCall(F, { Src0, Src1 });
252 }
253 
254 // Emit an intrinsic that has 3 operands of the same type as its result.
255 static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
256                                  const CallExpr *E,
257                                  unsigned IntrinsicID) {
258   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
259   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
260   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
261 
262   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
263   return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
264 }
265 
266 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
267 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
268                                const CallExpr *E,
269                                unsigned IntrinsicID) {
270   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
271   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
272 
273   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
274   return CGF.Builder.CreateCall(F, {Src0, Src1});
275 }
276 
277 /// EmitFAbs - Emit a call to @llvm.fabs().
278 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
279   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
280   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
281   Call->setDoesNotAccessMemory();
282   return Call;
283 }
284 
285 /// Emit the computation of the sign bit for a floating point value. Returns
286 /// the i1 sign bit value.
287 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
288   LLVMContext &C = CGF.CGM.getLLVMContext();
289 
290   llvm::Type *Ty = V->getType();
291   int Width = Ty->getPrimitiveSizeInBits();
292   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
293   V = CGF.Builder.CreateBitCast(V, IntTy);
294   if (Ty->isPPC_FP128Ty()) {
295     // We want the sign bit of the higher-order double. The bitcast we just
296     // did works as if the double-double was stored to memory and then
297     // read as an i128. The "store" will put the higher-order double in the
298     // lower address in both little- and big-Endian modes, but the "load"
299     // will treat those bits as a different part of the i128: the low bits in
300     // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
301     // we need to shift the high bits down to the low before truncating.
302     Width >>= 1;
303     if (CGF.getTarget().isBigEndian()) {
304       Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
305       V = CGF.Builder.CreateLShr(V, ShiftCst);
306     }
307     // We are truncating value in order to extract the higher-order
308     // double, which we will be using to extract the sign from.
309     IntTy = llvm::IntegerType::get(C, Width);
310     V = CGF.Builder.CreateTrunc(V, IntTy);
311   }
312   Value *Zero = llvm::Constant::getNullValue(IntTy);
313   return CGF.Builder.CreateICmpSLT(V, Zero);
314 }
315 
316 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
317                               const CallExpr *E, llvm::Constant *calleeValue) {
318   CGCallee callee = CGCallee::forDirect(calleeValue, FD);
319   return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
320 }
321 
322 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
323 /// depending on IntrinsicID.
324 ///
325 /// \arg CGF The current codegen function.
326 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
327 /// \arg X The first argument to the llvm.*.with.overflow.*.
328 /// \arg Y The second argument to the llvm.*.with.overflow.*.
329 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
330 /// \returns The result (i.e. sum/product) returned by the intrinsic.
331 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
332                                           const llvm::Intrinsic::ID IntrinsicID,
333                                           llvm::Value *X, llvm::Value *Y,
334                                           llvm::Value *&Carry) {
335   // Make sure we have integers of the same width.
336   assert(X->getType() == Y->getType() &&
337          "Arguments must be the same type. (Did you forget to make sure both "
338          "arguments have the same integer width?)");
339 
340   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
341   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
342   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
343   return CGF.Builder.CreateExtractValue(Tmp, 0);
344 }
345 
346 static Value *emitRangedBuiltin(CodeGenFunction &CGF,
347                                 unsigned IntrinsicID,
348                                 int low, int high) {
349     llvm::MDBuilder MDHelper(CGF.getLLVMContext());
350     llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
351     Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
352     llvm::Instruction *Call = CGF.Builder.CreateCall(F);
353     Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
354     return Call;
355 }
356 
357 namespace {
358   struct WidthAndSignedness {
359     unsigned Width;
360     bool Signed;
361   };
362 }
363 
364 static WidthAndSignedness
365 getIntegerWidthAndSignedness(const clang::ASTContext &context,
366                              const clang::QualType Type) {
367   assert(Type->isIntegerType() && "Given type is not an integer.");
368   unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
369   bool Signed = Type->isSignedIntegerType();
370   return {Width, Signed};
371 }
372 
373 // Given one or more integer types, this function produces an integer type that
374 // encompasses them: any value in one of the given types could be expressed in
375 // the encompassing type.
376 static struct WidthAndSignedness
377 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
378   assert(Types.size() > 0 && "Empty list of types.");
379 
380   // If any of the given types is signed, we must return a signed type.
381   bool Signed = false;
382   for (const auto &Type : Types) {
383     Signed |= Type.Signed;
384   }
385 
386   // The encompassing type must have a width greater than or equal to the width
387   // of the specified types.  Aditionally, if the encompassing type is signed,
388   // its width must be strictly greater than the width of any unsigned types
389   // given.
390   unsigned Width = 0;
391   for (const auto &Type : Types) {
392     unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
393     if (Width < MinWidth) {
394       Width = MinWidth;
395     }
396   }
397 
398   return {Width, Signed};
399 }
400 
401 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
402   llvm::Type *DestType = Int8PtrTy;
403   if (ArgValue->getType() != DestType)
404     ArgValue =
405         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
406 
407   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
408   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
409 }
410 
411 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
412 /// __builtin_object_size(p, @p To) is correct
413 static bool areBOSTypesCompatible(int From, int To) {
414   // Note: Our __builtin_object_size implementation currently treats Type=0 and
415   // Type=2 identically. Encoding this implementation detail here may make
416   // improving __builtin_object_size difficult in the future, so it's omitted.
417   return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
418 }
419 
420 static llvm::Value *
421 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
422   return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
423 }
424 
425 llvm::Value *
426 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
427                                                  llvm::IntegerType *ResType,
428                                                  llvm::Value *EmittedE) {
429   uint64_t ObjectSize;
430   if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
431     return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
432   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
433 }
434 
435 /// Returns a Value corresponding to the size of the given expression.
436 /// This Value may be either of the following:
437 ///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
438 ///     it)
439 ///   - A call to the @llvm.objectsize intrinsic
440 ///
441 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
442 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
443 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
444 llvm::Value *
445 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
446                                        llvm::IntegerType *ResType,
447                                        llvm::Value *EmittedE) {
448   // We need to reference an argument if the pointer is a parameter with the
449   // pass_object_size attribute.
450   if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
451     auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
452     auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
453     if (Param != nullptr && PS != nullptr &&
454         areBOSTypesCompatible(PS->getType(), Type)) {
455       auto Iter = SizeArguments.find(Param);
456       assert(Iter != SizeArguments.end());
457 
458       const ImplicitParamDecl *D = Iter->second;
459       auto DIter = LocalDeclMap.find(D);
460       assert(DIter != LocalDeclMap.end());
461 
462       return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
463                               getContext().getSizeType(), E->getLocStart());
464     }
465   }
466 
467   // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
468   // evaluate E for side-effects. In either case, we shouldn't lower to
469   // @llvm.objectsize.
470   if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
471     return getDefaultBuiltinObjectSizeResult(Type, ResType);
472 
473   Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
474   assert(Ptr->getType()->isPointerTy() &&
475          "Non-pointer passed to __builtin_object_size?");
476 
477   Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
478 
479   // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
480   Value *Min = Builder.getInt1((Type & 2) != 0);
481   // For GCC compatability, __builtin_object_size treat NULL as unknown size.
482   Value *NullIsUnknown = Builder.getTrue();
483   return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
484 }
485 
486 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
487 // handle them here.
488 enum class CodeGenFunction::MSVCIntrin {
489   _BitScanForward,
490   _BitScanReverse,
491   _InterlockedAnd,
492   _InterlockedDecrement,
493   _InterlockedExchange,
494   _InterlockedExchangeAdd,
495   _InterlockedExchangeSub,
496   _InterlockedIncrement,
497   _InterlockedOr,
498   _InterlockedXor,
499   _interlockedbittestandset,
500   __fastfail,
501 };
502 
503 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
504                                             const CallExpr *E) {
505   switch (BuiltinID) {
506   case MSVCIntrin::_BitScanForward:
507   case MSVCIntrin::_BitScanReverse: {
508     Value *ArgValue = EmitScalarExpr(E->getArg(1));
509 
510     llvm::Type *ArgType = ArgValue->getType();
511     llvm::Type *IndexType =
512       EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
513     llvm::Type *ResultType = ConvertType(E->getType());
514 
515     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
516     Value *ResZero = llvm::Constant::getNullValue(ResultType);
517     Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
518 
519     BasicBlock *Begin = Builder.GetInsertBlock();
520     BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
521     Builder.SetInsertPoint(End);
522     PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
523 
524     Builder.SetInsertPoint(Begin);
525     Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
526     BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
527     Builder.CreateCondBr(IsZero, End, NotZero);
528     Result->addIncoming(ResZero, Begin);
529 
530     Builder.SetInsertPoint(NotZero);
531     Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
532 
533     if (BuiltinID == MSVCIntrin::_BitScanForward) {
534       Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
535       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
536       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
537       Builder.CreateStore(ZeroCount, IndexAddress, false);
538     } else {
539       unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
540       Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
541 
542       Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
543       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
544       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
545       Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
546       Builder.CreateStore(Index, IndexAddress, false);
547     }
548     Builder.CreateBr(End);
549     Result->addIncoming(ResOne, NotZero);
550 
551     Builder.SetInsertPoint(End);
552     return Result;
553   }
554   case MSVCIntrin::_InterlockedAnd:
555     return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
556   case MSVCIntrin::_InterlockedExchange:
557     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
558   case MSVCIntrin::_InterlockedExchangeAdd:
559     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
560   case MSVCIntrin::_InterlockedExchangeSub:
561     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
562   case MSVCIntrin::_InterlockedOr:
563     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
564   case MSVCIntrin::_InterlockedXor:
565     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
566 
567   case MSVCIntrin::_interlockedbittestandset: {
568     llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
569     llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
570     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
571         AtomicRMWInst::Or, Addr,
572         Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
573         llvm::AtomicOrdering::SequentiallyConsistent);
574     // Shift the relevant bit to the least significant position, truncate to
575     // the result type, and test the low bit.
576     llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
577     llvm::Value *Truncated =
578         Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
579     return Builder.CreateAnd(Truncated,
580                              ConstantInt::get(Truncated->getType(), 1));
581   }
582 
583   case MSVCIntrin::_InterlockedDecrement: {
584     llvm::Type *IntTy = ConvertType(E->getType());
585     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
586       AtomicRMWInst::Sub,
587       EmitScalarExpr(E->getArg(0)),
588       ConstantInt::get(IntTy, 1),
589       llvm::AtomicOrdering::SequentiallyConsistent);
590     return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
591   }
592   case MSVCIntrin::_InterlockedIncrement: {
593     llvm::Type *IntTy = ConvertType(E->getType());
594     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
595       AtomicRMWInst::Add,
596       EmitScalarExpr(E->getArg(0)),
597       ConstantInt::get(IntTy, 1),
598       llvm::AtomicOrdering::SequentiallyConsistent);
599     return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
600   }
601 
602   case MSVCIntrin::__fastfail: {
603     // Request immediate process termination from the kernel. The instruction
604     // sequences to do this are documented on MSDN:
605     // https://msdn.microsoft.com/en-us/library/dn774154.aspx
606     llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
607     StringRef Asm, Constraints;
608     switch (ISA) {
609     default:
610       ErrorUnsupported(E, "__fastfail call for this architecture");
611       break;
612     case llvm::Triple::x86:
613     case llvm::Triple::x86_64:
614       Asm = "int $$0x29";
615       Constraints = "{cx}";
616       break;
617     case llvm::Triple::thumb:
618       Asm = "udf #251";
619       Constraints = "{r0}";
620       break;
621     }
622     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
623     llvm::InlineAsm *IA =
624         llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
625     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
626         getLLVMContext(), llvm::AttributeList::FunctionIndex,
627         llvm::Attribute::NoReturn);
628     CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
629     CS.setAttributes(NoReturnAttr);
630     return CS.getInstruction();
631   }
632   }
633   llvm_unreachable("Incorrect MSVC intrinsic!");
634 }
635 
636 namespace {
637 // ARC cleanup for __builtin_os_log_format
638 struct CallObjCArcUse final : EHScopeStack::Cleanup {
639   CallObjCArcUse(llvm::Value *object) : object(object) {}
640   llvm::Value *object;
641 
642   void Emit(CodeGenFunction &CGF, Flags flags) override {
643     CGF.EmitARCIntrinsicUse(object);
644   }
645 };
646 }
647 
648 Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
649                                                  BuiltinCheckKind Kind) {
650   assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
651           && "Unsupported builtin check kind");
652 
653   Value *ArgValue = EmitScalarExpr(E);
654   if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef())
655     return ArgValue;
656 
657   SanitizerScope SanScope(this);
658   Value *Cond = Builder.CreateICmpNE(
659       ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
660   EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
661             SanitizerHandler::InvalidBuiltin,
662             {EmitCheckSourceLocation(E->getExprLoc()),
663              llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
664             None);
665   return ArgValue;
666 }
667 
668 /// Get the argument type for arguments to os_log_helper.
669 static CanQualType getOSLogArgType(ASTContext &C, int Size) {
670   QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
671   return C.getCanonicalType(UnsignedTy);
672 }
673 
674 llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
675     const analyze_os_log::OSLogBufferLayout &Layout,
676     CharUnits BufferAlignment) {
677   ASTContext &Ctx = getContext();
678 
679   llvm::SmallString<64> Name;
680   {
681     raw_svector_ostream OS(Name);
682     OS << "__os_log_helper";
683     OS << "_" << BufferAlignment.getQuantity();
684     OS << "_" << int(Layout.getSummaryByte());
685     OS << "_" << int(Layout.getNumArgsByte());
686     for (const auto &Item : Layout.Items)
687       OS << "_" << int(Item.getSizeByte()) << "_"
688          << int(Item.getDescriptorByte());
689   }
690 
691   if (llvm::Function *F = CGM.getModule().getFunction(Name))
692     return F;
693 
694   llvm::SmallVector<ImplicitParamDecl, 4> Params;
695   Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"),
696                       Ctx.VoidPtrTy, ImplicitParamDecl::Other);
697 
698   for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
699     char Size = Layout.Items[I].getSizeByte();
700     if (!Size)
701       continue;
702 
703     Params.emplace_back(
704         Ctx, nullptr, SourceLocation(),
705         &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)),
706         getOSLogArgType(Ctx, Size), ImplicitParamDecl::Other);
707   }
708 
709   FunctionArgList Args;
710   for (auto &P : Params)
711     Args.push_back(&P);
712 
713   // The helper function has linkonce_odr linkage to enable the linker to merge
714   // identical functions. To ensure the merging always happens, 'noinline' is
715   // attached to the function when compiling with -Oz.
716   const CGFunctionInfo &FI =
717       CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
718   llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
719   llvm::Function *Fn = llvm::Function::Create(
720       FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
721   Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
722   CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn);
723   CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
724 
725   // Attach 'noinline' at -Oz.
726   if (CGM.getCodeGenOpts().OptimizeSize == 2)
727     Fn->addFnAttr(llvm::Attribute::NoInline);
728 
729   auto NL = ApplyDebugLocation::CreateEmpty(*this);
730   IdentifierInfo *II = &Ctx.Idents.get(Name);
731   FunctionDecl *FD = FunctionDecl::Create(
732       Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
733       Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false);
734 
735   StartFunction(FD, Ctx.VoidTy, Fn, FI, Args);
736 
737   // Create a scope with an artificial location for the body of this function.
738   auto AL = ApplyDebugLocation::CreateArtificial(*this);
739 
740   CharUnits Offset;
741   Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"),
742                   BufferAlignment);
743   Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
744                       Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
745   Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
746                       Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
747 
748   unsigned I = 1;
749   for (const auto &Item : Layout.Items) {
750     Builder.CreateStore(
751         Builder.getInt8(Item.getDescriptorByte()),
752         Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
753     Builder.CreateStore(
754         Builder.getInt8(Item.getSizeByte()),
755         Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
756 
757     CharUnits Size = Item.size();
758     if (!Size.getQuantity())
759       continue;
760 
761     Address Arg = GetAddrOfLocalVar(&Params[I]);
762     Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
763     Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(),
764                                  "argDataCast");
765     Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
766     Offset += Size;
767     ++I;
768   }
769 
770   FinishFunction();
771 
772   return Fn;
773 }
774 
775 RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
776   assert(E.getNumArgs() >= 2 &&
777          "__builtin_os_log_format takes at least 2 arguments");
778   ASTContext &Ctx = getContext();
779   analyze_os_log::OSLogBufferLayout Layout;
780   analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout);
781   Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
782   llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
783 
784   // Ignore argument 1, the format string. It is not currently used.
785   CallArgList Args;
786   Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
787 
788   for (const auto &Item : Layout.Items) {
789     int Size = Item.getSizeByte();
790     if (!Size)
791       continue;
792 
793     llvm::Value *ArgVal;
794 
795     if (const Expr *TheExpr = Item.getExpr()) {
796       ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
797 
798       // Check if this is a retainable type.
799       if (TheExpr->getType()->isObjCRetainableType()) {
800         assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
801                "Only scalar can be a ObjC retainable type");
802         // Check if the object is constant, if not, save it in
803         // RetainableOperands.
804         if (!isa<Constant>(ArgVal))
805           RetainableOperands.push_back(ArgVal);
806       }
807     } else {
808       ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
809     }
810 
811     unsigned ArgValSize =
812         CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
813     llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
814                                                      ArgValSize);
815     ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
816     CanQualType ArgTy = getOSLogArgType(Ctx, Size);
817     // If ArgVal has type x86_fp80, zero-extend ArgVal.
818     ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
819     Args.add(RValue::get(ArgVal), ArgTy);
820   }
821 
822   const CGFunctionInfo &FI =
823       CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
824   llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
825       Layout, BufAddr.getAlignment());
826   EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
827 
828   // Push a clang.arc.use cleanup for each object in RetainableOperands. The
829   // cleanup will cause the use to appear after the final log call, keeping
830   // the object valid while it’s held in the log buffer.  Note that if there’s
831   // a release cleanup on the object, it will already be active; since
832   // cleanups are emitted in reverse order, the use will occur before the
833   // object is released.
834   if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
835       CGM.getCodeGenOpts().OptimizationLevel != 0)
836     for (llvm::Value *Object : RetainableOperands)
837       pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object);
838 
839   return RValue::get(BufAddr.getPointer());
840 }
841 
842 /// Determine if a binop is a checked mixed-sign multiply we can specialize.
843 static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
844                                        WidthAndSignedness Op1Info,
845                                        WidthAndSignedness Op2Info,
846                                        WidthAndSignedness ResultInfo) {
847   return BuiltinID == Builtin::BI__builtin_mul_overflow &&
848          Op1Info.Width == Op2Info.Width && Op1Info.Width >= ResultInfo.Width &&
849          Op1Info.Signed != Op2Info.Signed;
850 }
851 
852 /// Emit a checked mixed-sign multiply. This is a cheaper specialization of
853 /// the generic checked-binop irgen.
854 static RValue
855 EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
856                              WidthAndSignedness Op1Info, const clang::Expr *Op2,
857                              WidthAndSignedness Op2Info,
858                              const clang::Expr *ResultArg, QualType ResultQTy,
859                              WidthAndSignedness ResultInfo) {
860   assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
861                                     Op2Info, ResultInfo) &&
862          "Not a mixed-sign multipliction we can specialize");
863 
864   // Emit the signed and unsigned operands.
865   const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
866   const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
867   llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
868   llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
869 
870   llvm::Type *OpTy = Signed->getType();
871   llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
872   Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
873   llvm::Type *ResTy = ResultPtr.getElementType();
874 
875   // Take the absolute value of the signed operand.
876   llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
877   llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
878   llvm::Value *AbsSigned =
879       CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
880 
881   // Perform a checked unsigned multiplication.
882   llvm::Value *UnsignedOverflow;
883   llvm::Value *UnsignedResult =
884       EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
885                             Unsigned, UnsignedOverflow);
886 
887   llvm::Value *Overflow, *Result;
888   if (ResultInfo.Signed) {
889     // Signed overflow occurs if the result is greater than INT_MAX or lesser
890     // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
891     auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width)
892                       .zextOrSelf(Op1Info.Width);
893     llvm::Value *MaxResult =
894         CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
895                               CGF.Builder.CreateZExt(IsNegative, OpTy));
896     llvm::Value *SignedOverflow =
897         CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
898     Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
899 
900     // Prepare the signed result (possibly by negating it).
901     llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
902     llvm::Value *SignedResult =
903         CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
904     Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
905   } else {
906     // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
907     llvm::Value *Underflow = CGF.Builder.CreateAnd(
908         IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
909     Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
910     if (ResultInfo.Width < Op1Info.Width) {
911       auto IntMax =
912           llvm::APInt::getMaxValue(ResultInfo.Width).zext(Op1Info.Width);
913       llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
914           UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
915       Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
916     }
917 
918     // Negate the product if it would be negative in infinite precision.
919     Result = CGF.Builder.CreateSelect(
920         IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
921 
922     Result = CGF.Builder.CreateTrunc(Result, ResTy);
923   }
924   assert(Overflow && Result && "Missing overflow or result");
925 
926   bool isVolatile =
927       ResultArg->getType()->getPointeeType().isVolatileQualified();
928   CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
929                           isVolatile);
930   return RValue::get(Overflow);
931 }
932 
933 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
934                                         unsigned BuiltinID, const CallExpr *E,
935                                         ReturnValueSlot ReturnValue) {
936   // See if we can constant fold this builtin.  If so, don't emit it at all.
937   Expr::EvalResult Result;
938   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
939       !Result.hasSideEffects()) {
940     if (Result.Val.isInt())
941       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
942                                                 Result.Val.getInt()));
943     if (Result.Val.isFloat())
944       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
945                                                Result.Val.getFloat()));
946   }
947 
948   // There are LLVM math intrinsics/instructions corresponding to math library
949   // functions except the LLVM op will never set errno while the math library
950   // might. Also, math builtins have the same semantics as their math library
951   // twins. Thus, we can transform math library and builtin calls to their
952   // LLVM counterparts if the call is marked 'const' (known to never set errno).
953   if (FD->hasAttr<ConstAttr>()) {
954     switch (BuiltinID) {
955     case Builtin::BIceil:
956     case Builtin::BIceilf:
957     case Builtin::BIceill:
958     case Builtin::BI__builtin_ceil:
959     case Builtin::BI__builtin_ceilf:
960     case Builtin::BI__builtin_ceill:
961       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
962 
963     case Builtin::BIcopysign:
964     case Builtin::BIcopysignf:
965     case Builtin::BIcopysignl:
966     case Builtin::BI__builtin_copysign:
967     case Builtin::BI__builtin_copysignf:
968     case Builtin::BI__builtin_copysignl:
969     case Builtin::BI__builtin_copysignf128:
970       return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
971 
972     case Builtin::BIcos:
973     case Builtin::BIcosf:
974     case Builtin::BIcosl:
975     case Builtin::BI__builtin_cos:
976     case Builtin::BI__builtin_cosf:
977     case Builtin::BI__builtin_cosl:
978       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::cos));
979 
980     case Builtin::BIexp:
981     case Builtin::BIexpf:
982     case Builtin::BIexpl:
983     case Builtin::BI__builtin_exp:
984     case Builtin::BI__builtin_expf:
985     case Builtin::BI__builtin_expl:
986       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp));
987 
988     case Builtin::BIexp2:
989     case Builtin::BIexp2f:
990     case Builtin::BIexp2l:
991     case Builtin::BI__builtin_exp2:
992     case Builtin::BI__builtin_exp2f:
993     case Builtin::BI__builtin_exp2l:
994       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp2));
995 
996     case Builtin::BIfabs:
997     case Builtin::BIfabsf:
998     case Builtin::BIfabsl:
999     case Builtin::BI__builtin_fabs:
1000     case Builtin::BI__builtin_fabsf:
1001     case Builtin::BI__builtin_fabsl:
1002     case Builtin::BI__builtin_fabsf128:
1003       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
1004 
1005     case Builtin::BIfloor:
1006     case Builtin::BIfloorf:
1007     case Builtin::BIfloorl:
1008     case Builtin::BI__builtin_floor:
1009     case Builtin::BI__builtin_floorf:
1010     case Builtin::BI__builtin_floorl:
1011       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
1012 
1013     case Builtin::BIfma:
1014     case Builtin::BIfmaf:
1015     case Builtin::BIfmal:
1016     case Builtin::BI__builtin_fma:
1017     case Builtin::BI__builtin_fmaf:
1018     case Builtin::BI__builtin_fmal:
1019       return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma));
1020 
1021     case Builtin::BIfmax:
1022     case Builtin::BIfmaxf:
1023     case Builtin::BIfmaxl:
1024     case Builtin::BI__builtin_fmax:
1025     case Builtin::BI__builtin_fmaxf:
1026     case Builtin::BI__builtin_fmaxl:
1027       return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
1028 
1029     case Builtin::BIfmin:
1030     case Builtin::BIfminf:
1031     case Builtin::BIfminl:
1032     case Builtin::BI__builtin_fmin:
1033     case Builtin::BI__builtin_fminf:
1034     case Builtin::BI__builtin_fminl:
1035       return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
1036 
1037     // fmod() is a special-case. It maps to the frem instruction rather than an
1038     // LLVM intrinsic.
1039     case Builtin::BIfmod:
1040     case Builtin::BIfmodf:
1041     case Builtin::BIfmodl:
1042     case Builtin::BI__builtin_fmod:
1043     case Builtin::BI__builtin_fmodf:
1044     case Builtin::BI__builtin_fmodl: {
1045       Value *Arg1 = EmitScalarExpr(E->getArg(0));
1046       Value *Arg2 = EmitScalarExpr(E->getArg(1));
1047       return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
1048     }
1049 
1050     case Builtin::BIlog:
1051     case Builtin::BIlogf:
1052     case Builtin::BIlogl:
1053     case Builtin::BI__builtin_log:
1054     case Builtin::BI__builtin_logf:
1055     case Builtin::BI__builtin_logl:
1056       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log));
1057 
1058     case Builtin::BIlog10:
1059     case Builtin::BIlog10f:
1060     case Builtin::BIlog10l:
1061     case Builtin::BI__builtin_log10:
1062     case Builtin::BI__builtin_log10f:
1063     case Builtin::BI__builtin_log10l:
1064       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log10));
1065 
1066     case Builtin::BIlog2:
1067     case Builtin::BIlog2f:
1068     case Builtin::BIlog2l:
1069     case Builtin::BI__builtin_log2:
1070     case Builtin::BI__builtin_log2f:
1071     case Builtin::BI__builtin_log2l:
1072       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log2));
1073 
1074     case Builtin::BInearbyint:
1075     case Builtin::BInearbyintf:
1076     case Builtin::BInearbyintl:
1077     case Builtin::BI__builtin_nearbyint:
1078     case Builtin::BI__builtin_nearbyintf:
1079     case Builtin::BI__builtin_nearbyintl:
1080       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
1081 
1082     case Builtin::BIpow:
1083     case Builtin::BIpowf:
1084     case Builtin::BIpowl:
1085     case Builtin::BI__builtin_pow:
1086     case Builtin::BI__builtin_powf:
1087     case Builtin::BI__builtin_powl:
1088       return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::pow));
1089 
1090     case Builtin::BIrint:
1091     case Builtin::BIrintf:
1092     case Builtin::BIrintl:
1093     case Builtin::BI__builtin_rint:
1094     case Builtin::BI__builtin_rintf:
1095     case Builtin::BI__builtin_rintl:
1096       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
1097 
1098     case Builtin::BIround:
1099     case Builtin::BIroundf:
1100     case Builtin::BIroundl:
1101     case Builtin::BI__builtin_round:
1102     case Builtin::BI__builtin_roundf:
1103     case Builtin::BI__builtin_roundl:
1104       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
1105 
1106     case Builtin::BIsin:
1107     case Builtin::BIsinf:
1108     case Builtin::BIsinl:
1109     case Builtin::BI__builtin_sin:
1110     case Builtin::BI__builtin_sinf:
1111     case Builtin::BI__builtin_sinl:
1112       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sin));
1113 
1114     case Builtin::BIsqrt:
1115     case Builtin::BIsqrtf:
1116     case Builtin::BIsqrtl:
1117     case Builtin::BI__builtin_sqrt:
1118     case Builtin::BI__builtin_sqrtf:
1119     case Builtin::BI__builtin_sqrtl:
1120       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt));
1121 
1122     case Builtin::BItrunc:
1123     case Builtin::BItruncf:
1124     case Builtin::BItruncl:
1125     case Builtin::BI__builtin_trunc:
1126     case Builtin::BI__builtin_truncf:
1127     case Builtin::BI__builtin_truncl:
1128       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
1129 
1130     default:
1131       break;
1132     }
1133   }
1134 
1135   switch (BuiltinID) {
1136   default: break;
1137   case Builtin::BI__builtin___CFStringMakeConstantString:
1138   case Builtin::BI__builtin___NSStringMakeConstantString:
1139     return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
1140   case Builtin::BI__builtin_stdarg_start:
1141   case Builtin::BI__builtin_va_start:
1142   case Builtin::BI__va_start:
1143   case Builtin::BI__builtin_va_end:
1144     return RValue::get(
1145         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
1146                            ? EmitScalarExpr(E->getArg(0))
1147                            : EmitVAListRef(E->getArg(0)).getPointer(),
1148                        BuiltinID != Builtin::BI__builtin_va_end));
1149   case Builtin::BI__builtin_va_copy: {
1150     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
1151     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
1152 
1153     llvm::Type *Type = Int8PtrTy;
1154 
1155     DstPtr = Builder.CreateBitCast(DstPtr, Type);
1156     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
1157     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
1158                                           {DstPtr, SrcPtr}));
1159   }
1160   case Builtin::BI__builtin_abs:
1161   case Builtin::BI__builtin_labs:
1162   case Builtin::BI__builtin_llabs: {
1163     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1164 
1165     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
1166     Value *CmpResult =
1167     Builder.CreateICmpSGE(ArgValue,
1168                           llvm::Constant::getNullValue(ArgValue->getType()),
1169                                                             "abscond");
1170     Value *Result =
1171       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
1172 
1173     return RValue::get(Result);
1174   }
1175   case Builtin::BI__builtin_conj:
1176   case Builtin::BI__builtin_conjf:
1177   case Builtin::BI__builtin_conjl: {
1178     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1179     Value *Real = ComplexVal.first;
1180     Value *Imag = ComplexVal.second;
1181     Value *Zero =
1182       Imag->getType()->isFPOrFPVectorTy()
1183         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
1184         : llvm::Constant::getNullValue(Imag->getType());
1185 
1186     Imag = Builder.CreateFSub(Zero, Imag, "sub");
1187     return RValue::getComplex(std::make_pair(Real, Imag));
1188   }
1189   case Builtin::BI__builtin_creal:
1190   case Builtin::BI__builtin_crealf:
1191   case Builtin::BI__builtin_creall:
1192   case Builtin::BIcreal:
1193   case Builtin::BIcrealf:
1194   case Builtin::BIcreall: {
1195     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1196     return RValue::get(ComplexVal.first);
1197   }
1198 
1199   case Builtin::BI__builtin_cimag:
1200   case Builtin::BI__builtin_cimagf:
1201   case Builtin::BI__builtin_cimagl:
1202   case Builtin::BIcimag:
1203   case Builtin::BIcimagf:
1204   case Builtin::BIcimagl: {
1205     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1206     return RValue::get(ComplexVal.second);
1207   }
1208 
1209   case Builtin::BI__builtin_ctzs:
1210   case Builtin::BI__builtin_ctz:
1211   case Builtin::BI__builtin_ctzl:
1212   case Builtin::BI__builtin_ctzll: {
1213     Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
1214 
1215     llvm::Type *ArgType = ArgValue->getType();
1216     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1217 
1218     llvm::Type *ResultType = ConvertType(E->getType());
1219     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
1220     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1221     if (Result->getType() != ResultType)
1222       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1223                                      "cast");
1224     return RValue::get(Result);
1225   }
1226   case Builtin::BI__builtin_clzs:
1227   case Builtin::BI__builtin_clz:
1228   case Builtin::BI__builtin_clzl:
1229   case Builtin::BI__builtin_clzll: {
1230     Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
1231 
1232     llvm::Type *ArgType = ArgValue->getType();
1233     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1234 
1235     llvm::Type *ResultType = ConvertType(E->getType());
1236     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
1237     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1238     if (Result->getType() != ResultType)
1239       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1240                                      "cast");
1241     return RValue::get(Result);
1242   }
1243   case Builtin::BI__builtin_ffs:
1244   case Builtin::BI__builtin_ffsl:
1245   case Builtin::BI__builtin_ffsll: {
1246     // ffs(x) -> x ? cttz(x) + 1 : 0
1247     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1248 
1249     llvm::Type *ArgType = ArgValue->getType();
1250     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1251 
1252     llvm::Type *ResultType = ConvertType(E->getType());
1253     Value *Tmp =
1254         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
1255                           llvm::ConstantInt::get(ArgType, 1));
1256     Value *Zero = llvm::Constant::getNullValue(ArgType);
1257     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
1258     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
1259     if (Result->getType() != ResultType)
1260       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1261                                      "cast");
1262     return RValue::get(Result);
1263   }
1264   case Builtin::BI__builtin_parity:
1265   case Builtin::BI__builtin_parityl:
1266   case Builtin::BI__builtin_parityll: {
1267     // parity(x) -> ctpop(x) & 1
1268     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1269 
1270     llvm::Type *ArgType = ArgValue->getType();
1271     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1272 
1273     llvm::Type *ResultType = ConvertType(E->getType());
1274     Value *Tmp = Builder.CreateCall(F, ArgValue);
1275     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
1276     if (Result->getType() != ResultType)
1277       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1278                                      "cast");
1279     return RValue::get(Result);
1280   }
1281   case Builtin::BI__popcnt16:
1282   case Builtin::BI__popcnt:
1283   case Builtin::BI__popcnt64:
1284   case Builtin::BI__builtin_popcount:
1285   case Builtin::BI__builtin_popcountl:
1286   case Builtin::BI__builtin_popcountll: {
1287     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1288 
1289     llvm::Type *ArgType = ArgValue->getType();
1290     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1291 
1292     llvm::Type *ResultType = ConvertType(E->getType());
1293     Value *Result = Builder.CreateCall(F, ArgValue);
1294     if (Result->getType() != ResultType)
1295       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1296                                      "cast");
1297     return RValue::get(Result);
1298   }
1299   case Builtin::BI_rotr8:
1300   case Builtin::BI_rotr16:
1301   case Builtin::BI_rotr:
1302   case Builtin::BI_lrotr:
1303   case Builtin::BI_rotr64: {
1304     Value *Val = EmitScalarExpr(E->getArg(0));
1305     Value *Shift = EmitScalarExpr(E->getArg(1));
1306 
1307     llvm::Type *ArgType = Val->getType();
1308     Shift = Builder.CreateIntCast(Shift, ArgType, false);
1309     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1310     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
1311     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1312 
1313     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
1314     Shift = Builder.CreateAnd(Shift, Mask);
1315     Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
1316 
1317     Value *RightShifted = Builder.CreateLShr(Val, Shift);
1318     Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
1319     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
1320 
1321     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
1322     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
1323     return RValue::get(Result);
1324   }
1325   case Builtin::BI_rotl8:
1326   case Builtin::BI_rotl16:
1327   case Builtin::BI_rotl:
1328   case Builtin::BI_lrotl:
1329   case Builtin::BI_rotl64: {
1330     Value *Val = EmitScalarExpr(E->getArg(0));
1331     Value *Shift = EmitScalarExpr(E->getArg(1));
1332 
1333     llvm::Type *ArgType = Val->getType();
1334     Shift = Builder.CreateIntCast(Shift, ArgType, false);
1335     unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1336     Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
1337     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1338 
1339     Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
1340     Shift = Builder.CreateAnd(Shift, Mask);
1341     Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
1342 
1343     Value *LeftShifted = Builder.CreateShl(Val, Shift);
1344     Value *RightShifted = Builder.CreateLShr(Val, RightShift);
1345     Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
1346 
1347     Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
1348     Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
1349     return RValue::get(Result);
1350   }
1351   case Builtin::BI__builtin_unpredictable: {
1352     // Always return the argument of __builtin_unpredictable. LLVM does not
1353     // handle this builtin. Metadata for this builtin should be added directly
1354     // to instructions such as branches or switches that use it.
1355     return RValue::get(EmitScalarExpr(E->getArg(0)));
1356   }
1357   case Builtin::BI__builtin_expect: {
1358     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1359     llvm::Type *ArgType = ArgValue->getType();
1360 
1361     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
1362     // Don't generate llvm.expect on -O0 as the backend won't use it for
1363     // anything.
1364     // Note, we still IRGen ExpectedValue because it could have side-effects.
1365     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
1366       return RValue::get(ArgValue);
1367 
1368     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
1369     Value *Result =
1370         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
1371     return RValue::get(Result);
1372   }
1373   case Builtin::BI__builtin_assume_aligned: {
1374     Value *PtrValue = EmitScalarExpr(E->getArg(0));
1375     Value *OffsetValue =
1376       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
1377 
1378     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
1379     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
1380     unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
1381 
1382     EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
1383     return RValue::get(PtrValue);
1384   }
1385   case Builtin::BI__assume:
1386   case Builtin::BI__builtin_assume: {
1387     if (E->getArg(0)->HasSideEffects(getContext()))
1388       return RValue::get(nullptr);
1389 
1390     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1391     Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
1392     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
1393   }
1394   case Builtin::BI__builtin_bswap16:
1395   case Builtin::BI__builtin_bswap32:
1396   case Builtin::BI__builtin_bswap64: {
1397     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
1398   }
1399   case Builtin::BI__builtin_bitreverse8:
1400   case Builtin::BI__builtin_bitreverse16:
1401   case Builtin::BI__builtin_bitreverse32:
1402   case Builtin::BI__builtin_bitreverse64: {
1403     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
1404   }
1405   case Builtin::BI__builtin_object_size: {
1406     unsigned Type =
1407         E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
1408     auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
1409 
1410     // We pass this builtin onto the optimizer so that it can figure out the
1411     // object size in more complex cases.
1412     return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
1413                                              /*EmittedE=*/nullptr));
1414   }
1415   case Builtin::BI__builtin_prefetch: {
1416     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
1417     // FIXME: Technically these constants should of type 'int', yes?
1418     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
1419       llvm::ConstantInt::get(Int32Ty, 0);
1420     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
1421       llvm::ConstantInt::get(Int32Ty, 3);
1422     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
1423     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
1424     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
1425   }
1426   case Builtin::BI__builtin_readcyclecounter: {
1427     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
1428     return RValue::get(Builder.CreateCall(F));
1429   }
1430   case Builtin::BI__builtin___clear_cache: {
1431     Value *Begin = EmitScalarExpr(E->getArg(0));
1432     Value *End = EmitScalarExpr(E->getArg(1));
1433     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
1434     return RValue::get(Builder.CreateCall(F, {Begin, End}));
1435   }
1436   case Builtin::BI__builtin_trap:
1437     return RValue::get(EmitTrapCall(Intrinsic::trap));
1438   case Builtin::BI__debugbreak:
1439     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
1440   case Builtin::BI__builtin_unreachable: {
1441     EmitUnreachable(E->getExprLoc());
1442 
1443     // We do need to preserve an insertion point.
1444     EmitBlock(createBasicBlock("unreachable.cont"));
1445 
1446     return RValue::get(nullptr);
1447   }
1448 
1449   case Builtin::BI__builtin_powi:
1450   case Builtin::BI__builtin_powif:
1451   case Builtin::BI__builtin_powil: {
1452     Value *Base = EmitScalarExpr(E->getArg(0));
1453     Value *Exponent = EmitScalarExpr(E->getArg(1));
1454     llvm::Type *ArgType = Base->getType();
1455     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
1456     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1457   }
1458 
1459   case Builtin::BI__builtin_isgreater:
1460   case Builtin::BI__builtin_isgreaterequal:
1461   case Builtin::BI__builtin_isless:
1462   case Builtin::BI__builtin_islessequal:
1463   case Builtin::BI__builtin_islessgreater:
1464   case Builtin::BI__builtin_isunordered: {
1465     // Ordered comparisons: we know the arguments to these are matching scalar
1466     // floating point values.
1467     Value *LHS = EmitScalarExpr(E->getArg(0));
1468     Value *RHS = EmitScalarExpr(E->getArg(1));
1469 
1470     switch (BuiltinID) {
1471     default: llvm_unreachable("Unknown ordered comparison");
1472     case Builtin::BI__builtin_isgreater:
1473       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
1474       break;
1475     case Builtin::BI__builtin_isgreaterequal:
1476       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
1477       break;
1478     case Builtin::BI__builtin_isless:
1479       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
1480       break;
1481     case Builtin::BI__builtin_islessequal:
1482       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
1483       break;
1484     case Builtin::BI__builtin_islessgreater:
1485       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
1486       break;
1487     case Builtin::BI__builtin_isunordered:
1488       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
1489       break;
1490     }
1491     // ZExt bool to int type.
1492     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
1493   }
1494   case Builtin::BI__builtin_isnan: {
1495     Value *V = EmitScalarExpr(E->getArg(0));
1496     V = Builder.CreateFCmpUNO(V, V, "cmp");
1497     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1498   }
1499 
1500   case Builtin::BIfinite:
1501   case Builtin::BI__finite:
1502   case Builtin::BIfinitef:
1503   case Builtin::BI__finitef:
1504   case Builtin::BIfinitel:
1505   case Builtin::BI__finitel:
1506   case Builtin::BI__builtin_isinf:
1507   case Builtin::BI__builtin_isfinite: {
1508     // isinf(x)    --> fabs(x) == infinity
1509     // isfinite(x) --> fabs(x) != infinity
1510     // x != NaN via the ordered compare in either case.
1511     Value *V = EmitScalarExpr(E->getArg(0));
1512     Value *Fabs = EmitFAbs(*this, V);
1513     Constant *Infinity = ConstantFP::getInfinity(V->getType());
1514     CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
1515                                   ? CmpInst::FCMP_OEQ
1516                                   : CmpInst::FCMP_ONE;
1517     Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
1518     return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
1519   }
1520 
1521   case Builtin::BI__builtin_isinf_sign: {
1522     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
1523     Value *Arg = EmitScalarExpr(E->getArg(0));
1524     Value *AbsArg = EmitFAbs(*this, Arg);
1525     Value *IsInf = Builder.CreateFCmpOEQ(
1526         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
1527     Value *IsNeg = EmitSignBit(*this, Arg);
1528 
1529     llvm::Type *IntTy = ConvertType(E->getType());
1530     Value *Zero = Constant::getNullValue(IntTy);
1531     Value *One = ConstantInt::get(IntTy, 1);
1532     Value *NegativeOne = ConstantInt::get(IntTy, -1);
1533     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
1534     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
1535     return RValue::get(Result);
1536   }
1537 
1538   case Builtin::BI__builtin_isnormal: {
1539     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
1540     Value *V = EmitScalarExpr(E->getArg(0));
1541     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
1542 
1543     Value *Abs = EmitFAbs(*this, V);
1544     Value *IsLessThanInf =
1545       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
1546     APFloat Smallest = APFloat::getSmallestNormalized(
1547                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
1548     Value *IsNormal =
1549       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
1550                             "isnormal");
1551     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
1552     V = Builder.CreateAnd(V, IsNormal, "and");
1553     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1554   }
1555 
1556   case Builtin::BI__builtin_fpclassify: {
1557     Value *V = EmitScalarExpr(E->getArg(5));
1558     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
1559 
1560     // Create Result
1561     BasicBlock *Begin = Builder.GetInsertBlock();
1562     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
1563     Builder.SetInsertPoint(End);
1564     PHINode *Result =
1565       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
1566                         "fpclassify_result");
1567 
1568     // if (V==0) return FP_ZERO
1569     Builder.SetInsertPoint(Begin);
1570     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
1571                                           "iszero");
1572     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
1573     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
1574     Builder.CreateCondBr(IsZero, End, NotZero);
1575     Result->addIncoming(ZeroLiteral, Begin);
1576 
1577     // if (V != V) return FP_NAN
1578     Builder.SetInsertPoint(NotZero);
1579     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
1580     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
1581     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
1582     Builder.CreateCondBr(IsNan, End, NotNan);
1583     Result->addIncoming(NanLiteral, NotZero);
1584 
1585     // if (fabs(V) == infinity) return FP_INFINITY
1586     Builder.SetInsertPoint(NotNan);
1587     Value *VAbs = EmitFAbs(*this, V);
1588     Value *IsInf =
1589       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
1590                             "isinf");
1591     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
1592     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
1593     Builder.CreateCondBr(IsInf, End, NotInf);
1594     Result->addIncoming(InfLiteral, NotNan);
1595 
1596     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
1597     Builder.SetInsertPoint(NotInf);
1598     APFloat Smallest = APFloat::getSmallestNormalized(
1599         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
1600     Value *IsNormal =
1601       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
1602                             "isnormal");
1603     Value *NormalResult =
1604       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
1605                            EmitScalarExpr(E->getArg(3)));
1606     Builder.CreateBr(End);
1607     Result->addIncoming(NormalResult, NotInf);
1608 
1609     // return Result
1610     Builder.SetInsertPoint(End);
1611     return RValue::get(Result);
1612   }
1613 
1614   case Builtin::BIalloca:
1615   case Builtin::BI_alloca:
1616   case Builtin::BI__builtin_alloca: {
1617     Value *Size = EmitScalarExpr(E->getArg(0));
1618     const TargetInfo &TI = getContext().getTargetInfo();
1619     // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
1620     unsigned SuitableAlignmentInBytes =
1621         CGM.getContext()
1622             .toCharUnitsFromBits(TI.getSuitableAlign())
1623             .getQuantity();
1624     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1625     AI->setAlignment(SuitableAlignmentInBytes);
1626     return RValue::get(AI);
1627   }
1628 
1629   case Builtin::BI__builtin_alloca_with_align: {
1630     Value *Size = EmitScalarExpr(E->getArg(0));
1631     Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
1632     auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
1633     unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
1634     unsigned AlignmentInBytes =
1635         CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
1636     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1637     AI->setAlignment(AlignmentInBytes);
1638     return RValue::get(AI);
1639   }
1640 
1641   case Builtin::BIbzero:
1642   case Builtin::BI__builtin_bzero: {
1643     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1644     Value *SizeVal = EmitScalarExpr(E->getArg(1));
1645     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1646                         E->getArg(0)->getExprLoc(), FD, 0);
1647     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
1648     return RValue::get(nullptr);
1649   }
1650   case Builtin::BImemcpy:
1651   case Builtin::BI__builtin_memcpy: {
1652     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1653     Address Src = EmitPointerWithAlignment(E->getArg(1));
1654     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1655     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1656                         E->getArg(0)->getExprLoc(), FD, 0);
1657     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1658                         E->getArg(1)->getExprLoc(), FD, 1);
1659     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1660     return RValue::get(Dest.getPointer());
1661   }
1662 
1663   case Builtin::BI__builtin_char_memchr:
1664     BuiltinID = Builtin::BI__builtin_memchr;
1665     break;
1666 
1667   case Builtin::BI__builtin___memcpy_chk: {
1668     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
1669     llvm::APSInt Size, DstSize;
1670     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1671         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1672       break;
1673     if (Size.ugt(DstSize))
1674       break;
1675     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1676     Address Src = EmitPointerWithAlignment(E->getArg(1));
1677     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1678     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1679     return RValue::get(Dest.getPointer());
1680   }
1681 
1682   case Builtin::BI__builtin_objc_memmove_collectable: {
1683     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1684     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1685     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1686     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1687                                                   DestAddr, SrcAddr, SizeVal);
1688     return RValue::get(DestAddr.getPointer());
1689   }
1690 
1691   case Builtin::BI__builtin___memmove_chk: {
1692     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1693     llvm::APSInt Size, DstSize;
1694     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1695         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1696       break;
1697     if (Size.ugt(DstSize))
1698       break;
1699     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1700     Address Src = EmitPointerWithAlignment(E->getArg(1));
1701     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1702     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1703     return RValue::get(Dest.getPointer());
1704   }
1705 
1706   case Builtin::BImemmove:
1707   case Builtin::BI__builtin_memmove: {
1708     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1709     Address Src = EmitPointerWithAlignment(E->getArg(1));
1710     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1711     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1712                         E->getArg(0)->getExprLoc(), FD, 0);
1713     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1714                         E->getArg(1)->getExprLoc(), FD, 1);
1715     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1716     return RValue::get(Dest.getPointer());
1717   }
1718   case Builtin::BImemset:
1719   case Builtin::BI__builtin_memset: {
1720     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1721     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1722                                          Builder.getInt8Ty());
1723     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1724     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1725                         E->getArg(0)->getExprLoc(), FD, 0);
1726     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1727     return RValue::get(Dest.getPointer());
1728   }
1729   case Builtin::BI__builtin___memset_chk: {
1730     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1731     llvm::APSInt Size, DstSize;
1732     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1733         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1734       break;
1735     if (Size.ugt(DstSize))
1736       break;
1737     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1738     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1739                                          Builder.getInt8Ty());
1740     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1741     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1742     return RValue::get(Dest.getPointer());
1743   }
1744   case Builtin::BI__builtin_wmemcmp: {
1745     // The MSVC runtime library does not provide a definition of wmemcmp, so we
1746     // need an inline implementation.
1747     if (!getTarget().getTriple().isOSMSVCRT())
1748       break;
1749 
1750     llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
1751 
1752     Value *Dst = EmitScalarExpr(E->getArg(0));
1753     Value *Src = EmitScalarExpr(E->getArg(1));
1754     Value *Size = EmitScalarExpr(E->getArg(2));
1755 
1756     BasicBlock *Entry = Builder.GetInsertBlock();
1757     BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
1758     BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
1759     BasicBlock *Next = createBasicBlock("wmemcmp.next");
1760     BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
1761     Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
1762     Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
1763 
1764     EmitBlock(CmpGT);
1765     PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
1766     DstPhi->addIncoming(Dst, Entry);
1767     PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
1768     SrcPhi->addIncoming(Src, Entry);
1769     PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
1770     SizePhi->addIncoming(Size, Entry);
1771     CharUnits WCharAlign =
1772         getContext().getTypeAlignInChars(getContext().WCharTy);
1773     Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
1774     Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
1775     Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
1776     Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
1777 
1778     EmitBlock(CmpLT);
1779     Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
1780     Builder.CreateCondBr(DstLtSrc, Exit, Next);
1781 
1782     EmitBlock(Next);
1783     Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
1784     Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
1785     Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
1786     Value *NextSizeEq0 =
1787         Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
1788     Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
1789     DstPhi->addIncoming(NextDst, Next);
1790     SrcPhi->addIncoming(NextSrc, Next);
1791     SizePhi->addIncoming(NextSize, Next);
1792 
1793     EmitBlock(Exit);
1794     PHINode *Ret = Builder.CreatePHI(IntTy, 4);
1795     Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
1796     Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
1797     Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
1798     Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
1799     return RValue::get(Ret);
1800   }
1801   case Builtin::BI__builtin_dwarf_cfa: {
1802     // The offset in bytes from the first argument to the CFA.
1803     //
1804     // Why on earth is this in the frontend?  Is there any reason at
1805     // all that the backend can't reasonably determine this while
1806     // lowering llvm.eh.dwarf.cfa()?
1807     //
1808     // TODO: If there's a satisfactory reason, add a target hook for
1809     // this instead of hard-coding 0, which is correct for most targets.
1810     int32_t Offset = 0;
1811 
1812     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1813     return RValue::get(Builder.CreateCall(F,
1814                                       llvm::ConstantInt::get(Int32Ty, Offset)));
1815   }
1816   case Builtin::BI__builtin_return_address: {
1817     Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
1818                                                    getContext().UnsignedIntTy);
1819     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1820     return RValue::get(Builder.CreateCall(F, Depth));
1821   }
1822   case Builtin::BI_ReturnAddress: {
1823     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1824     return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
1825   }
1826   case Builtin::BI__builtin_frame_address: {
1827     Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
1828                                                    getContext().UnsignedIntTy);
1829     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1830     return RValue::get(Builder.CreateCall(F, Depth));
1831   }
1832   case Builtin::BI__builtin_extract_return_addr: {
1833     Value *Address = EmitScalarExpr(E->getArg(0));
1834     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1835     return RValue::get(Result);
1836   }
1837   case Builtin::BI__builtin_frob_return_addr: {
1838     Value *Address = EmitScalarExpr(E->getArg(0));
1839     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1840     return RValue::get(Result);
1841   }
1842   case Builtin::BI__builtin_dwarf_sp_column: {
1843     llvm::IntegerType *Ty
1844       = cast<llvm::IntegerType>(ConvertType(E->getType()));
1845     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1846     if (Column == -1) {
1847       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1848       return RValue::get(llvm::UndefValue::get(Ty));
1849     }
1850     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1851   }
1852   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1853     Value *Address = EmitScalarExpr(E->getArg(0));
1854     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1855       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1856     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1857   }
1858   case Builtin::BI__builtin_eh_return: {
1859     Value *Int = EmitScalarExpr(E->getArg(0));
1860     Value *Ptr = EmitScalarExpr(E->getArg(1));
1861 
1862     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1863     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1864            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1865     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1866                                   ? Intrinsic::eh_return_i32
1867                                   : Intrinsic::eh_return_i64);
1868     Builder.CreateCall(F, {Int, Ptr});
1869     Builder.CreateUnreachable();
1870 
1871     // We do need to preserve an insertion point.
1872     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1873 
1874     return RValue::get(nullptr);
1875   }
1876   case Builtin::BI__builtin_unwind_init: {
1877     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1878     return RValue::get(Builder.CreateCall(F));
1879   }
1880   case Builtin::BI__builtin_extend_pointer: {
1881     // Extends a pointer to the size of an _Unwind_Word, which is
1882     // uint64_t on all platforms.  Generally this gets poked into a
1883     // register and eventually used as an address, so if the
1884     // addressing registers are wider than pointers and the platform
1885     // doesn't implicitly ignore high-order bits when doing
1886     // addressing, we need to make sure we zext / sext based on
1887     // the platform's expectations.
1888     //
1889     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1890 
1891     // Cast the pointer to intptr_t.
1892     Value *Ptr = EmitScalarExpr(E->getArg(0));
1893     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1894 
1895     // If that's 64 bits, we're done.
1896     if (IntPtrTy->getBitWidth() == 64)
1897       return RValue::get(Result);
1898 
1899     // Otherwise, ask the codegen data what to do.
1900     if (getTargetHooks().extendPointerWithSExt())
1901       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1902     else
1903       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1904   }
1905   case Builtin::BI__builtin_setjmp: {
1906     // Buffer is a void**.
1907     Address Buf = EmitPointerWithAlignment(E->getArg(0));
1908 
1909     // Store the frame pointer to the setjmp buffer.
1910     Value *FrameAddr =
1911       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1912                          ConstantInt::get(Int32Ty, 0));
1913     Builder.CreateStore(FrameAddr, Buf);
1914 
1915     // Store the stack pointer to the setjmp buffer.
1916     Value *StackAddr =
1917         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1918     Address StackSaveSlot =
1919       Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1920     Builder.CreateStore(StackAddr, StackSaveSlot);
1921 
1922     // Call LLVM's EH setjmp, which is lightweight.
1923     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1924     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1925     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1926   }
1927   case Builtin::BI__builtin_longjmp: {
1928     Value *Buf = EmitScalarExpr(E->getArg(0));
1929     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1930 
1931     // Call LLVM's EH longjmp, which is lightweight.
1932     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1933 
1934     // longjmp doesn't return; mark this as unreachable.
1935     Builder.CreateUnreachable();
1936 
1937     // We do need to preserve an insertion point.
1938     EmitBlock(createBasicBlock("longjmp.cont"));
1939 
1940     return RValue::get(nullptr);
1941   }
1942   case Builtin::BI__sync_fetch_and_add:
1943   case Builtin::BI__sync_fetch_and_sub:
1944   case Builtin::BI__sync_fetch_and_or:
1945   case Builtin::BI__sync_fetch_and_and:
1946   case Builtin::BI__sync_fetch_and_xor:
1947   case Builtin::BI__sync_fetch_and_nand:
1948   case Builtin::BI__sync_add_and_fetch:
1949   case Builtin::BI__sync_sub_and_fetch:
1950   case Builtin::BI__sync_and_and_fetch:
1951   case Builtin::BI__sync_or_and_fetch:
1952   case Builtin::BI__sync_xor_and_fetch:
1953   case Builtin::BI__sync_nand_and_fetch:
1954   case Builtin::BI__sync_val_compare_and_swap:
1955   case Builtin::BI__sync_bool_compare_and_swap:
1956   case Builtin::BI__sync_lock_test_and_set:
1957   case Builtin::BI__sync_lock_release:
1958   case Builtin::BI__sync_swap:
1959     llvm_unreachable("Shouldn't make it through sema");
1960   case Builtin::BI__sync_fetch_and_add_1:
1961   case Builtin::BI__sync_fetch_and_add_2:
1962   case Builtin::BI__sync_fetch_and_add_4:
1963   case Builtin::BI__sync_fetch_and_add_8:
1964   case Builtin::BI__sync_fetch_and_add_16:
1965     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1966   case Builtin::BI__sync_fetch_and_sub_1:
1967   case Builtin::BI__sync_fetch_and_sub_2:
1968   case Builtin::BI__sync_fetch_and_sub_4:
1969   case Builtin::BI__sync_fetch_and_sub_8:
1970   case Builtin::BI__sync_fetch_and_sub_16:
1971     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1972   case Builtin::BI__sync_fetch_and_or_1:
1973   case Builtin::BI__sync_fetch_and_or_2:
1974   case Builtin::BI__sync_fetch_and_or_4:
1975   case Builtin::BI__sync_fetch_and_or_8:
1976   case Builtin::BI__sync_fetch_and_or_16:
1977     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1978   case Builtin::BI__sync_fetch_and_and_1:
1979   case Builtin::BI__sync_fetch_and_and_2:
1980   case Builtin::BI__sync_fetch_and_and_4:
1981   case Builtin::BI__sync_fetch_and_and_8:
1982   case Builtin::BI__sync_fetch_and_and_16:
1983     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1984   case Builtin::BI__sync_fetch_and_xor_1:
1985   case Builtin::BI__sync_fetch_and_xor_2:
1986   case Builtin::BI__sync_fetch_and_xor_4:
1987   case Builtin::BI__sync_fetch_and_xor_8:
1988   case Builtin::BI__sync_fetch_and_xor_16:
1989     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1990   case Builtin::BI__sync_fetch_and_nand_1:
1991   case Builtin::BI__sync_fetch_and_nand_2:
1992   case Builtin::BI__sync_fetch_and_nand_4:
1993   case Builtin::BI__sync_fetch_and_nand_8:
1994   case Builtin::BI__sync_fetch_and_nand_16:
1995     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1996 
1997   // Clang extensions: not overloaded yet.
1998   case Builtin::BI__sync_fetch_and_min:
1999     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
2000   case Builtin::BI__sync_fetch_and_max:
2001     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
2002   case Builtin::BI__sync_fetch_and_umin:
2003     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
2004   case Builtin::BI__sync_fetch_and_umax:
2005     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
2006 
2007   case Builtin::BI__sync_add_and_fetch_1:
2008   case Builtin::BI__sync_add_and_fetch_2:
2009   case Builtin::BI__sync_add_and_fetch_4:
2010   case Builtin::BI__sync_add_and_fetch_8:
2011   case Builtin::BI__sync_add_and_fetch_16:
2012     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
2013                                 llvm::Instruction::Add);
2014   case Builtin::BI__sync_sub_and_fetch_1:
2015   case Builtin::BI__sync_sub_and_fetch_2:
2016   case Builtin::BI__sync_sub_and_fetch_4:
2017   case Builtin::BI__sync_sub_and_fetch_8:
2018   case Builtin::BI__sync_sub_and_fetch_16:
2019     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
2020                                 llvm::Instruction::Sub);
2021   case Builtin::BI__sync_and_and_fetch_1:
2022   case Builtin::BI__sync_and_and_fetch_2:
2023   case Builtin::BI__sync_and_and_fetch_4:
2024   case Builtin::BI__sync_and_and_fetch_8:
2025   case Builtin::BI__sync_and_and_fetch_16:
2026     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
2027                                 llvm::Instruction::And);
2028   case Builtin::BI__sync_or_and_fetch_1:
2029   case Builtin::BI__sync_or_and_fetch_2:
2030   case Builtin::BI__sync_or_and_fetch_4:
2031   case Builtin::BI__sync_or_and_fetch_8:
2032   case Builtin::BI__sync_or_and_fetch_16:
2033     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
2034                                 llvm::Instruction::Or);
2035   case Builtin::BI__sync_xor_and_fetch_1:
2036   case Builtin::BI__sync_xor_and_fetch_2:
2037   case Builtin::BI__sync_xor_and_fetch_4:
2038   case Builtin::BI__sync_xor_and_fetch_8:
2039   case Builtin::BI__sync_xor_and_fetch_16:
2040     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
2041                                 llvm::Instruction::Xor);
2042   case Builtin::BI__sync_nand_and_fetch_1:
2043   case Builtin::BI__sync_nand_and_fetch_2:
2044   case Builtin::BI__sync_nand_and_fetch_4:
2045   case Builtin::BI__sync_nand_and_fetch_8:
2046   case Builtin::BI__sync_nand_and_fetch_16:
2047     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
2048                                 llvm::Instruction::And, true);
2049 
2050   case Builtin::BI__sync_val_compare_and_swap_1:
2051   case Builtin::BI__sync_val_compare_and_swap_2:
2052   case Builtin::BI__sync_val_compare_and_swap_4:
2053   case Builtin::BI__sync_val_compare_and_swap_8:
2054   case Builtin::BI__sync_val_compare_and_swap_16:
2055     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
2056 
2057   case Builtin::BI__sync_bool_compare_and_swap_1:
2058   case Builtin::BI__sync_bool_compare_and_swap_2:
2059   case Builtin::BI__sync_bool_compare_and_swap_4:
2060   case Builtin::BI__sync_bool_compare_and_swap_8:
2061   case Builtin::BI__sync_bool_compare_and_swap_16:
2062     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
2063 
2064   case Builtin::BI__sync_swap_1:
2065   case Builtin::BI__sync_swap_2:
2066   case Builtin::BI__sync_swap_4:
2067   case Builtin::BI__sync_swap_8:
2068   case Builtin::BI__sync_swap_16:
2069     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
2070 
2071   case Builtin::BI__sync_lock_test_and_set_1:
2072   case Builtin::BI__sync_lock_test_and_set_2:
2073   case Builtin::BI__sync_lock_test_and_set_4:
2074   case Builtin::BI__sync_lock_test_and_set_8:
2075   case Builtin::BI__sync_lock_test_and_set_16:
2076     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
2077 
2078   case Builtin::BI__sync_lock_release_1:
2079   case Builtin::BI__sync_lock_release_2:
2080   case Builtin::BI__sync_lock_release_4:
2081   case Builtin::BI__sync_lock_release_8:
2082   case Builtin::BI__sync_lock_release_16: {
2083     Value *Ptr = EmitScalarExpr(E->getArg(0));
2084     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
2085     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
2086     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
2087                                              StoreSize.getQuantity() * 8);
2088     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
2089     llvm::StoreInst *Store =
2090       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
2091                                  StoreSize);
2092     Store->setAtomic(llvm::AtomicOrdering::Release);
2093     return RValue::get(nullptr);
2094   }
2095 
2096   case Builtin::BI__sync_synchronize: {
2097     // We assume this is supposed to correspond to a C++0x-style
2098     // sequentially-consistent fence (i.e. this is only usable for
2099     // synchonization, not device I/O or anything like that). This intrinsic
2100     // is really badly designed in the sense that in theory, there isn't
2101     // any way to safely use it... but in practice, it mostly works
2102     // to use it with non-atomic loads and stores to get acquire/release
2103     // semantics.
2104     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
2105     return RValue::get(nullptr);
2106   }
2107 
2108   case Builtin::BI__builtin_nontemporal_load:
2109     return RValue::get(EmitNontemporalLoad(*this, E));
2110   case Builtin::BI__builtin_nontemporal_store:
2111     return RValue::get(EmitNontemporalStore(*this, E));
2112   case Builtin::BI__c11_atomic_is_lock_free:
2113   case Builtin::BI__atomic_is_lock_free: {
2114     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
2115     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
2116     // _Atomic(T) is always properly-aligned.
2117     const char *LibCallName = "__atomic_is_lock_free";
2118     CallArgList Args;
2119     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
2120              getContext().getSizeType());
2121     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
2122       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
2123                getContext().VoidPtrTy);
2124     else
2125       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
2126                getContext().VoidPtrTy);
2127     const CGFunctionInfo &FuncInfo =
2128         CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
2129     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
2130     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
2131     return EmitCall(FuncInfo, CGCallee::forDirect(Func),
2132                     ReturnValueSlot(), Args);
2133   }
2134 
2135   case Builtin::BI__atomic_test_and_set: {
2136     // Look at the argument type to determine whether this is a volatile
2137     // operation. The parameter type is always volatile.
2138     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
2139     bool Volatile =
2140         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
2141 
2142     Value *Ptr = EmitScalarExpr(E->getArg(0));
2143     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
2144     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
2145     Value *NewVal = Builder.getInt8(1);
2146     Value *Order = EmitScalarExpr(E->getArg(1));
2147     if (isa<llvm::ConstantInt>(Order)) {
2148       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2149       AtomicRMWInst *Result = nullptr;
2150       switch (ord) {
2151       case 0:  // memory_order_relaxed
2152       default: // invalid order
2153         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2154                                          llvm::AtomicOrdering::Monotonic);
2155         break;
2156       case 1: // memory_order_consume
2157       case 2: // memory_order_acquire
2158         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2159                                          llvm::AtomicOrdering::Acquire);
2160         break;
2161       case 3: // memory_order_release
2162         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2163                                          llvm::AtomicOrdering::Release);
2164         break;
2165       case 4: // memory_order_acq_rel
2166 
2167         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2168                                          llvm::AtomicOrdering::AcquireRelease);
2169         break;
2170       case 5: // memory_order_seq_cst
2171         Result = Builder.CreateAtomicRMW(
2172             llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2173             llvm::AtomicOrdering::SequentiallyConsistent);
2174         break;
2175       }
2176       Result->setVolatile(Volatile);
2177       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
2178     }
2179 
2180     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2181 
2182     llvm::BasicBlock *BBs[5] = {
2183       createBasicBlock("monotonic", CurFn),
2184       createBasicBlock("acquire", CurFn),
2185       createBasicBlock("release", CurFn),
2186       createBasicBlock("acqrel", CurFn),
2187       createBasicBlock("seqcst", CurFn)
2188     };
2189     llvm::AtomicOrdering Orders[5] = {
2190         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
2191         llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
2192         llvm::AtomicOrdering::SequentiallyConsistent};
2193 
2194     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2195     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
2196 
2197     Builder.SetInsertPoint(ContBB);
2198     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
2199 
2200     for (unsigned i = 0; i < 5; ++i) {
2201       Builder.SetInsertPoint(BBs[i]);
2202       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
2203                                                    Ptr, NewVal, Orders[i]);
2204       RMW->setVolatile(Volatile);
2205       Result->addIncoming(RMW, BBs[i]);
2206       Builder.CreateBr(ContBB);
2207     }
2208 
2209     SI->addCase(Builder.getInt32(0), BBs[0]);
2210     SI->addCase(Builder.getInt32(1), BBs[1]);
2211     SI->addCase(Builder.getInt32(2), BBs[1]);
2212     SI->addCase(Builder.getInt32(3), BBs[2]);
2213     SI->addCase(Builder.getInt32(4), BBs[3]);
2214     SI->addCase(Builder.getInt32(5), BBs[4]);
2215 
2216     Builder.SetInsertPoint(ContBB);
2217     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
2218   }
2219 
2220   case Builtin::BI__atomic_clear: {
2221     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
2222     bool Volatile =
2223         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
2224 
2225     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
2226     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
2227     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
2228     Value *NewVal = Builder.getInt8(0);
2229     Value *Order = EmitScalarExpr(E->getArg(1));
2230     if (isa<llvm::ConstantInt>(Order)) {
2231       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2232       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
2233       switch (ord) {
2234       case 0:  // memory_order_relaxed
2235       default: // invalid order
2236         Store->setOrdering(llvm::AtomicOrdering::Monotonic);
2237         break;
2238       case 3:  // memory_order_release
2239         Store->setOrdering(llvm::AtomicOrdering::Release);
2240         break;
2241       case 5:  // memory_order_seq_cst
2242         Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
2243         break;
2244       }
2245       return RValue::get(nullptr);
2246     }
2247 
2248     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2249 
2250     llvm::BasicBlock *BBs[3] = {
2251       createBasicBlock("monotonic", CurFn),
2252       createBasicBlock("release", CurFn),
2253       createBasicBlock("seqcst", CurFn)
2254     };
2255     llvm::AtomicOrdering Orders[3] = {
2256         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
2257         llvm::AtomicOrdering::SequentiallyConsistent};
2258 
2259     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2260     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
2261 
2262     for (unsigned i = 0; i < 3; ++i) {
2263       Builder.SetInsertPoint(BBs[i]);
2264       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
2265       Store->setOrdering(Orders[i]);
2266       Builder.CreateBr(ContBB);
2267     }
2268 
2269     SI->addCase(Builder.getInt32(0), BBs[0]);
2270     SI->addCase(Builder.getInt32(3), BBs[1]);
2271     SI->addCase(Builder.getInt32(5), BBs[2]);
2272 
2273     Builder.SetInsertPoint(ContBB);
2274     return RValue::get(nullptr);
2275   }
2276 
2277   case Builtin::BI__atomic_thread_fence:
2278   case Builtin::BI__atomic_signal_fence:
2279   case Builtin::BI__c11_atomic_thread_fence:
2280   case Builtin::BI__c11_atomic_signal_fence: {
2281     llvm::SyncScope::ID SSID;
2282     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
2283         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
2284       SSID = llvm::SyncScope::SingleThread;
2285     else
2286       SSID = llvm::SyncScope::System;
2287     Value *Order = EmitScalarExpr(E->getArg(0));
2288     if (isa<llvm::ConstantInt>(Order)) {
2289       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2290       switch (ord) {
2291       case 0:  // memory_order_relaxed
2292       default: // invalid order
2293         break;
2294       case 1:  // memory_order_consume
2295       case 2:  // memory_order_acquire
2296         Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
2297         break;
2298       case 3:  // memory_order_release
2299         Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
2300         break;
2301       case 4:  // memory_order_acq_rel
2302         Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
2303         break;
2304       case 5:  // memory_order_seq_cst
2305         Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
2306         break;
2307       }
2308       return RValue::get(nullptr);
2309     }
2310 
2311     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
2312     AcquireBB = createBasicBlock("acquire", CurFn);
2313     ReleaseBB = createBasicBlock("release", CurFn);
2314     AcqRelBB = createBasicBlock("acqrel", CurFn);
2315     SeqCstBB = createBasicBlock("seqcst", CurFn);
2316     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2317 
2318     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2319     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
2320 
2321     Builder.SetInsertPoint(AcquireBB);
2322     Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
2323     Builder.CreateBr(ContBB);
2324     SI->addCase(Builder.getInt32(1), AcquireBB);
2325     SI->addCase(Builder.getInt32(2), AcquireBB);
2326 
2327     Builder.SetInsertPoint(ReleaseBB);
2328     Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
2329     Builder.CreateBr(ContBB);
2330     SI->addCase(Builder.getInt32(3), ReleaseBB);
2331 
2332     Builder.SetInsertPoint(AcqRelBB);
2333     Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
2334     Builder.CreateBr(ContBB);
2335     SI->addCase(Builder.getInt32(4), AcqRelBB);
2336 
2337     Builder.SetInsertPoint(SeqCstBB);
2338     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
2339     Builder.CreateBr(ContBB);
2340     SI->addCase(Builder.getInt32(5), SeqCstBB);
2341 
2342     Builder.SetInsertPoint(ContBB);
2343     return RValue::get(nullptr);
2344   }
2345 
2346   case Builtin::BI__builtin_signbit:
2347   case Builtin::BI__builtin_signbitf:
2348   case Builtin::BI__builtin_signbitl: {
2349     return RValue::get(
2350         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
2351                            ConvertType(E->getType())));
2352   }
2353   case Builtin::BI__annotation: {
2354     // Re-encode each wide string to UTF8 and make an MDString.
2355     SmallVector<Metadata *, 1> Strings;
2356     for (const Expr *Arg : E->arguments()) {
2357       const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
2358       assert(Str->getCharByteWidth() == 2);
2359       StringRef WideBytes = Str->getBytes();
2360       std::string StrUtf8;
2361       if (!convertUTF16ToUTF8String(
2362               makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
2363         CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
2364         continue;
2365       }
2366       Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
2367     }
2368 
2369     // Build and MDTuple of MDStrings and emit the intrinsic call.
2370     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
2371     MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
2372     Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
2373     return RValue::getIgnored();
2374   }
2375   case Builtin::BI__builtin_annotation: {
2376     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
2377     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
2378                                       AnnVal->getType());
2379 
2380     // Get the annotation string, go through casts. Sema requires this to be a
2381     // non-wide string literal, potentially casted, so the cast<> is safe.
2382     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
2383     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
2384     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
2385   }
2386   case Builtin::BI__builtin_addcb:
2387   case Builtin::BI__builtin_addcs:
2388   case Builtin::BI__builtin_addc:
2389   case Builtin::BI__builtin_addcl:
2390   case Builtin::BI__builtin_addcll:
2391   case Builtin::BI__builtin_subcb:
2392   case Builtin::BI__builtin_subcs:
2393   case Builtin::BI__builtin_subc:
2394   case Builtin::BI__builtin_subcl:
2395   case Builtin::BI__builtin_subcll: {
2396 
2397     // We translate all of these builtins from expressions of the form:
2398     //   int x = ..., y = ..., carryin = ..., carryout, result;
2399     //   result = __builtin_addc(x, y, carryin, &carryout);
2400     //
2401     // to LLVM IR of the form:
2402     //
2403     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
2404     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
2405     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
2406     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
2407     //                                                       i32 %carryin)
2408     //   %result = extractvalue {i32, i1} %tmp2, 0
2409     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
2410     //   %tmp3 = or i1 %carry1, %carry2
2411     //   %tmp4 = zext i1 %tmp3 to i32
2412     //   store i32 %tmp4, i32* %carryout
2413 
2414     // Scalarize our inputs.
2415     llvm::Value *X = EmitScalarExpr(E->getArg(0));
2416     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2417     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
2418     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
2419 
2420     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
2421     llvm::Intrinsic::ID IntrinsicId;
2422     switch (BuiltinID) {
2423     default: llvm_unreachable("Unknown multiprecision builtin id.");
2424     case Builtin::BI__builtin_addcb:
2425     case Builtin::BI__builtin_addcs:
2426     case Builtin::BI__builtin_addc:
2427     case Builtin::BI__builtin_addcl:
2428     case Builtin::BI__builtin_addcll:
2429       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2430       break;
2431     case Builtin::BI__builtin_subcb:
2432     case Builtin::BI__builtin_subcs:
2433     case Builtin::BI__builtin_subc:
2434     case Builtin::BI__builtin_subcl:
2435     case Builtin::BI__builtin_subcll:
2436       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2437       break;
2438     }
2439 
2440     // Construct our resulting LLVM IR expression.
2441     llvm::Value *Carry1;
2442     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
2443                                               X, Y, Carry1);
2444     llvm::Value *Carry2;
2445     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
2446                                               Sum1, Carryin, Carry2);
2447     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
2448                                                X->getType());
2449     Builder.CreateStore(CarryOut, CarryOutPtr);
2450     return RValue::get(Sum2);
2451   }
2452 
2453   case Builtin::BI__builtin_add_overflow:
2454   case Builtin::BI__builtin_sub_overflow:
2455   case Builtin::BI__builtin_mul_overflow: {
2456     const clang::Expr *LeftArg = E->getArg(0);
2457     const clang::Expr *RightArg = E->getArg(1);
2458     const clang::Expr *ResultArg = E->getArg(2);
2459 
2460     clang::QualType ResultQTy =
2461         ResultArg->getType()->castAs<PointerType>()->getPointeeType();
2462 
2463     WidthAndSignedness LeftInfo =
2464         getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
2465     WidthAndSignedness RightInfo =
2466         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
2467     WidthAndSignedness ResultInfo =
2468         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
2469 
2470     // Handle mixed-sign multiplication as a special case, because adding
2471     // runtime or backend support for our generic irgen would be too expensive.
2472     if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
2473       return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
2474                                           RightInfo, ResultArg, ResultQTy,
2475                                           ResultInfo);
2476 
2477     WidthAndSignedness EncompassingInfo =
2478         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
2479 
2480     llvm::Type *EncompassingLLVMTy =
2481         llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
2482 
2483     llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
2484 
2485     llvm::Intrinsic::ID IntrinsicId;
2486     switch (BuiltinID) {
2487     default:
2488       llvm_unreachable("Unknown overflow builtin id.");
2489     case Builtin::BI__builtin_add_overflow:
2490       IntrinsicId = EncompassingInfo.Signed
2491                         ? llvm::Intrinsic::sadd_with_overflow
2492                         : llvm::Intrinsic::uadd_with_overflow;
2493       break;
2494     case Builtin::BI__builtin_sub_overflow:
2495       IntrinsicId = EncompassingInfo.Signed
2496                         ? llvm::Intrinsic::ssub_with_overflow
2497                         : llvm::Intrinsic::usub_with_overflow;
2498       break;
2499     case Builtin::BI__builtin_mul_overflow:
2500       IntrinsicId = EncompassingInfo.Signed
2501                         ? llvm::Intrinsic::smul_with_overflow
2502                         : llvm::Intrinsic::umul_with_overflow;
2503       break;
2504     }
2505 
2506     llvm::Value *Left = EmitScalarExpr(LeftArg);
2507     llvm::Value *Right = EmitScalarExpr(RightArg);
2508     Address ResultPtr = EmitPointerWithAlignment(ResultArg);
2509 
2510     // Extend each operand to the encompassing type.
2511     Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
2512     Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
2513 
2514     // Perform the operation on the extended values.
2515     llvm::Value *Overflow, *Result;
2516     Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
2517 
2518     if (EncompassingInfo.Width > ResultInfo.Width) {
2519       // The encompassing type is wider than the result type, so we need to
2520       // truncate it.
2521       llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
2522 
2523       // To see if the truncation caused an overflow, we will extend
2524       // the result and then compare it to the original result.
2525       llvm::Value *ResultTruncExt = Builder.CreateIntCast(
2526           ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
2527       llvm::Value *TruncationOverflow =
2528           Builder.CreateICmpNE(Result, ResultTruncExt);
2529 
2530       Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
2531       Result = ResultTrunc;
2532     }
2533 
2534     // Finally, store the result using the pointer.
2535     bool isVolatile =
2536       ResultArg->getType()->getPointeeType().isVolatileQualified();
2537     Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
2538 
2539     return RValue::get(Overflow);
2540   }
2541 
2542   case Builtin::BI__builtin_uadd_overflow:
2543   case Builtin::BI__builtin_uaddl_overflow:
2544   case Builtin::BI__builtin_uaddll_overflow:
2545   case Builtin::BI__builtin_usub_overflow:
2546   case Builtin::BI__builtin_usubl_overflow:
2547   case Builtin::BI__builtin_usubll_overflow:
2548   case Builtin::BI__builtin_umul_overflow:
2549   case Builtin::BI__builtin_umull_overflow:
2550   case Builtin::BI__builtin_umulll_overflow:
2551   case Builtin::BI__builtin_sadd_overflow:
2552   case Builtin::BI__builtin_saddl_overflow:
2553   case Builtin::BI__builtin_saddll_overflow:
2554   case Builtin::BI__builtin_ssub_overflow:
2555   case Builtin::BI__builtin_ssubl_overflow:
2556   case Builtin::BI__builtin_ssubll_overflow:
2557   case Builtin::BI__builtin_smul_overflow:
2558   case Builtin::BI__builtin_smull_overflow:
2559   case Builtin::BI__builtin_smulll_overflow: {
2560 
2561     // We translate all of these builtins directly to the relevant llvm IR node.
2562 
2563     // Scalarize our inputs.
2564     llvm::Value *X = EmitScalarExpr(E->getArg(0));
2565     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2566     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
2567 
2568     // Decide which of the overflow intrinsics we are lowering to:
2569     llvm::Intrinsic::ID IntrinsicId;
2570     switch (BuiltinID) {
2571     default: llvm_unreachable("Unknown overflow builtin id.");
2572     case Builtin::BI__builtin_uadd_overflow:
2573     case Builtin::BI__builtin_uaddl_overflow:
2574     case Builtin::BI__builtin_uaddll_overflow:
2575       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2576       break;
2577     case Builtin::BI__builtin_usub_overflow:
2578     case Builtin::BI__builtin_usubl_overflow:
2579     case Builtin::BI__builtin_usubll_overflow:
2580       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2581       break;
2582     case Builtin::BI__builtin_umul_overflow:
2583     case Builtin::BI__builtin_umull_overflow:
2584     case Builtin::BI__builtin_umulll_overflow:
2585       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
2586       break;
2587     case Builtin::BI__builtin_sadd_overflow:
2588     case Builtin::BI__builtin_saddl_overflow:
2589     case Builtin::BI__builtin_saddll_overflow:
2590       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
2591       break;
2592     case Builtin::BI__builtin_ssub_overflow:
2593     case Builtin::BI__builtin_ssubl_overflow:
2594     case Builtin::BI__builtin_ssubll_overflow:
2595       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
2596       break;
2597     case Builtin::BI__builtin_smul_overflow:
2598     case Builtin::BI__builtin_smull_overflow:
2599     case Builtin::BI__builtin_smulll_overflow:
2600       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
2601       break;
2602     }
2603 
2604 
2605     llvm::Value *Carry;
2606     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
2607     Builder.CreateStore(Sum, SumOutPtr);
2608 
2609     return RValue::get(Carry);
2610   }
2611   case Builtin::BI__builtin_addressof:
2612     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
2613   case Builtin::BI__builtin_operator_new:
2614     return EmitBuiltinNewDeleteCall(
2615         E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
2616   case Builtin::BI__builtin_operator_delete:
2617     return EmitBuiltinNewDeleteCall(
2618         E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
2619 
2620   case Builtin::BI__noop:
2621     // __noop always evaluates to an integer literal zero.
2622     return RValue::get(ConstantInt::get(IntTy, 0));
2623   case Builtin::BI__builtin_call_with_static_chain: {
2624     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
2625     const Expr *Chain = E->getArg(1);
2626     return EmitCall(Call->getCallee()->getType(),
2627                     EmitCallee(Call->getCallee()), Call, ReturnValue,
2628                     EmitScalarExpr(Chain));
2629   }
2630   case Builtin::BI_InterlockedExchange8:
2631   case Builtin::BI_InterlockedExchange16:
2632   case Builtin::BI_InterlockedExchange:
2633   case Builtin::BI_InterlockedExchangePointer:
2634     return RValue::get(
2635         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
2636   case Builtin::BI_InterlockedCompareExchangePointer: {
2637     llvm::Type *RTy;
2638     llvm::IntegerType *IntType =
2639       IntegerType::get(getLLVMContext(),
2640                        getContext().getTypeSize(E->getType()));
2641     llvm::Type *IntPtrType = IntType->getPointerTo();
2642 
2643     llvm::Value *Destination =
2644       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
2645 
2646     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
2647     RTy = Exchange->getType();
2648     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
2649 
2650     llvm::Value *Comparand =
2651       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
2652 
2653     auto Result =
2654         Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
2655                                     AtomicOrdering::SequentiallyConsistent,
2656                                     AtomicOrdering::SequentiallyConsistent);
2657     Result->setVolatile(true);
2658 
2659     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
2660                                                                          0),
2661                                               RTy));
2662   }
2663   case Builtin::BI_InterlockedCompareExchange8:
2664   case Builtin::BI_InterlockedCompareExchange16:
2665   case Builtin::BI_InterlockedCompareExchange:
2666   case Builtin::BI_InterlockedCompareExchange64: {
2667     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
2668         EmitScalarExpr(E->getArg(0)),
2669         EmitScalarExpr(E->getArg(2)),
2670         EmitScalarExpr(E->getArg(1)),
2671         AtomicOrdering::SequentiallyConsistent,
2672         AtomicOrdering::SequentiallyConsistent);
2673       CXI->setVolatile(true);
2674       return RValue::get(Builder.CreateExtractValue(CXI, 0));
2675   }
2676   case Builtin::BI_InterlockedIncrement16:
2677   case Builtin::BI_InterlockedIncrement:
2678     return RValue::get(
2679         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
2680   case Builtin::BI_InterlockedDecrement16:
2681   case Builtin::BI_InterlockedDecrement:
2682     return RValue::get(
2683         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
2684   case Builtin::BI_InterlockedAnd8:
2685   case Builtin::BI_InterlockedAnd16:
2686   case Builtin::BI_InterlockedAnd:
2687     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
2688   case Builtin::BI_InterlockedExchangeAdd8:
2689   case Builtin::BI_InterlockedExchangeAdd16:
2690   case Builtin::BI_InterlockedExchangeAdd:
2691     return RValue::get(
2692         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
2693   case Builtin::BI_InterlockedExchangeSub8:
2694   case Builtin::BI_InterlockedExchangeSub16:
2695   case Builtin::BI_InterlockedExchangeSub:
2696     return RValue::get(
2697         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
2698   case Builtin::BI_InterlockedOr8:
2699   case Builtin::BI_InterlockedOr16:
2700   case Builtin::BI_InterlockedOr:
2701     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
2702   case Builtin::BI_InterlockedXor8:
2703   case Builtin::BI_InterlockedXor16:
2704   case Builtin::BI_InterlockedXor:
2705     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
2706   case Builtin::BI_interlockedbittestandset:
2707     return RValue::get(
2708         EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
2709 
2710   case Builtin::BI__exception_code:
2711   case Builtin::BI_exception_code:
2712     return RValue::get(EmitSEHExceptionCode());
2713   case Builtin::BI__exception_info:
2714   case Builtin::BI_exception_info:
2715     return RValue::get(EmitSEHExceptionInfo());
2716   case Builtin::BI__abnormal_termination:
2717   case Builtin::BI_abnormal_termination:
2718     return RValue::get(EmitSEHAbnormalTermination());
2719   case Builtin::BI_setjmpex: {
2720     if (getTarget().getTriple().isOSMSVCRT()) {
2721       llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2722       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2723           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2724           llvm::Attribute::ReturnsTwice);
2725       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2726           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2727           "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
2728       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2729           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2730       llvm::Value *FrameAddr =
2731           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2732                              ConstantInt::get(Int32Ty, 0));
2733       llvm::Value *Args[] = {Buf, FrameAddr};
2734       llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2735       CS.setAttributes(ReturnsTwiceAttr);
2736       return RValue::get(CS.getInstruction());
2737     }
2738     break;
2739   }
2740   case Builtin::BI_setjmp: {
2741     if (getTarget().getTriple().isOSMSVCRT()) {
2742       llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2743           getLLVMContext(), llvm::AttributeList::FunctionIndex,
2744           llvm::Attribute::ReturnsTwice);
2745       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2746           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2747       llvm::CallSite CS;
2748       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2749         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2750         llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2751             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2752             "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
2753         llvm::Value *Count = ConstantInt::get(IntTy, 0);
2754         llvm::Value *Args[] = {Buf, Count};
2755         CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2756       } else {
2757         llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2758         llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2759             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2760             "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
2761         llvm::Value *FrameAddr =
2762             Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2763                                ConstantInt::get(Int32Ty, 0));
2764         llvm::Value *Args[] = {Buf, FrameAddr};
2765         CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2766       }
2767       CS.setAttributes(ReturnsTwiceAttr);
2768       return RValue::get(CS.getInstruction());
2769     }
2770     break;
2771   }
2772 
2773   case Builtin::BI__GetExceptionInfo: {
2774     if (llvm::GlobalVariable *GV =
2775             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2776       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2777     break;
2778   }
2779 
2780   case Builtin::BI__fastfail:
2781     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
2782 
2783   case Builtin::BI__builtin_coro_size: {
2784     auto & Context = getContext();
2785     auto SizeTy = Context.getSizeType();
2786     auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
2787     Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
2788     return RValue::get(Builder.CreateCall(F));
2789   }
2790 
2791   case Builtin::BI__builtin_coro_id:
2792     return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
2793   case Builtin::BI__builtin_coro_promise:
2794     return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
2795   case Builtin::BI__builtin_coro_resume:
2796     return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
2797   case Builtin::BI__builtin_coro_frame:
2798     return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
2799   case Builtin::BI__builtin_coro_noop:
2800     return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
2801   case Builtin::BI__builtin_coro_free:
2802     return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
2803   case Builtin::BI__builtin_coro_destroy:
2804     return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
2805   case Builtin::BI__builtin_coro_done:
2806     return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
2807   case Builtin::BI__builtin_coro_alloc:
2808     return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
2809   case Builtin::BI__builtin_coro_begin:
2810     return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
2811   case Builtin::BI__builtin_coro_end:
2812     return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
2813   case Builtin::BI__builtin_coro_suspend:
2814     return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
2815   case Builtin::BI__builtin_coro_param:
2816     return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
2817 
2818   // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2819   case Builtin::BIread_pipe:
2820   case Builtin::BIwrite_pipe: {
2821     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2822           *Arg1 = EmitScalarExpr(E->getArg(1));
2823     CGOpenCLRuntime OpenCLRT(CGM);
2824     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2825     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2826 
2827     // Type of the generic packet parameter.
2828     unsigned GenericAS =
2829         getContext().getTargetAddressSpace(LangAS::opencl_generic);
2830     llvm::Type *I8PTy = llvm::PointerType::get(
2831         llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2832 
2833     // Testing which overloaded version we should generate the call for.
2834     if (2U == E->getNumArgs()) {
2835       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2836                                                              : "__write_pipe_2";
2837       // Creating a generic function type to be able to call with any builtin or
2838       // user defined type.
2839       llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
2840       llvm::FunctionType *FTy = llvm::FunctionType::get(
2841           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2842       Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2843       return RValue::get(
2844           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2845                              {Arg0, BCast, PacketSize, PacketAlign}));
2846     } else {
2847       assert(4 == E->getNumArgs() &&
2848              "Illegal number of parameters to pipe function");
2849       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2850                                                              : "__write_pipe_4";
2851 
2852       llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
2853                               Int32Ty, Int32Ty};
2854       Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2855             *Arg3 = EmitScalarExpr(E->getArg(3));
2856       llvm::FunctionType *FTy = llvm::FunctionType::get(
2857           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2858       Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2859       // We know the third argument is an integer type, but we may need to cast
2860       // it to i32.
2861       if (Arg2->getType() != Int32Ty)
2862         Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2863       return RValue::get(Builder.CreateCall(
2864           CGM.CreateRuntimeFunction(FTy, Name),
2865           {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
2866     }
2867   }
2868   // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2869   // functions
2870   case Builtin::BIreserve_read_pipe:
2871   case Builtin::BIreserve_write_pipe:
2872   case Builtin::BIwork_group_reserve_read_pipe:
2873   case Builtin::BIwork_group_reserve_write_pipe:
2874   case Builtin::BIsub_group_reserve_read_pipe:
2875   case Builtin::BIsub_group_reserve_write_pipe: {
2876     // Composing the mangled name for the function.
2877     const char *Name;
2878     if (BuiltinID == Builtin::BIreserve_read_pipe)
2879       Name = "__reserve_read_pipe";
2880     else if (BuiltinID == Builtin::BIreserve_write_pipe)
2881       Name = "__reserve_write_pipe";
2882     else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2883       Name = "__work_group_reserve_read_pipe";
2884     else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2885       Name = "__work_group_reserve_write_pipe";
2886     else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2887       Name = "__sub_group_reserve_read_pipe";
2888     else
2889       Name = "__sub_group_reserve_write_pipe";
2890 
2891     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2892           *Arg1 = EmitScalarExpr(E->getArg(1));
2893     llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2894     CGOpenCLRuntime OpenCLRT(CGM);
2895     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2896     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2897 
2898     // Building the generic function prototype.
2899     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
2900     llvm::FunctionType *FTy = llvm::FunctionType::get(
2901         ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2902     // We know the second argument is an integer type, but we may need to cast
2903     // it to i32.
2904     if (Arg1->getType() != Int32Ty)
2905       Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2906     return RValue::get(
2907         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2908                            {Arg0, Arg1, PacketSize, PacketAlign}));
2909   }
2910   // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2911   // functions
2912   case Builtin::BIcommit_read_pipe:
2913   case Builtin::BIcommit_write_pipe:
2914   case Builtin::BIwork_group_commit_read_pipe:
2915   case Builtin::BIwork_group_commit_write_pipe:
2916   case Builtin::BIsub_group_commit_read_pipe:
2917   case Builtin::BIsub_group_commit_write_pipe: {
2918     const char *Name;
2919     if (BuiltinID == Builtin::BIcommit_read_pipe)
2920       Name = "__commit_read_pipe";
2921     else if (BuiltinID == Builtin::BIcommit_write_pipe)
2922       Name = "__commit_write_pipe";
2923     else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2924       Name = "__work_group_commit_read_pipe";
2925     else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2926       Name = "__work_group_commit_write_pipe";
2927     else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2928       Name = "__sub_group_commit_read_pipe";
2929     else
2930       Name = "__sub_group_commit_write_pipe";
2931 
2932     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2933           *Arg1 = EmitScalarExpr(E->getArg(1));
2934     CGOpenCLRuntime OpenCLRT(CGM);
2935     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2936     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2937 
2938     // Building the generic function prototype.
2939     llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
2940     llvm::FunctionType *FTy =
2941         llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2942                                 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2943 
2944     return RValue::get(
2945         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2946                            {Arg0, Arg1, PacketSize, PacketAlign}));
2947   }
2948   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2949   case Builtin::BIget_pipe_num_packets:
2950   case Builtin::BIget_pipe_max_packets: {
2951     const char *Name;
2952     if (BuiltinID == Builtin::BIget_pipe_num_packets)
2953       Name = "__get_pipe_num_packets";
2954     else
2955       Name = "__get_pipe_max_packets";
2956 
2957     // Building the generic function prototype.
2958     Value *Arg0 = EmitScalarExpr(E->getArg(0));
2959     CGOpenCLRuntime OpenCLRT(CGM);
2960     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2961     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2962     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
2963     llvm::FunctionType *FTy = llvm::FunctionType::get(
2964         Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2965 
2966     return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2967                                           {Arg0, PacketSize, PacketAlign}));
2968   }
2969 
2970   // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2971   case Builtin::BIto_global:
2972   case Builtin::BIto_local:
2973   case Builtin::BIto_private: {
2974     auto Arg0 = EmitScalarExpr(E->getArg(0));
2975     auto NewArgT = llvm::PointerType::get(Int8Ty,
2976       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2977     auto NewRetT = llvm::PointerType::get(Int8Ty,
2978       CGM.getContext().getTargetAddressSpace(
2979         E->getType()->getPointeeType().getAddressSpace()));
2980     auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2981     llvm::Value *NewArg;
2982     if (Arg0->getType()->getPointerAddressSpace() !=
2983         NewArgT->getPointerAddressSpace())
2984       NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2985     else
2986       NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2987     auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2988     auto NewCall =
2989         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2990     return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2991       ConvertType(E->getType())));
2992   }
2993 
2994   // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2995   // It contains four different overload formats specified in Table 6.13.17.1.
2996   case Builtin::BIenqueue_kernel: {
2997     StringRef Name; // Generated function call name
2998     unsigned NumArgs = E->getNumArgs();
2999 
3000     llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
3001     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3002         getContext().getTargetAddressSpace(LangAS::opencl_generic));
3003 
3004     llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
3005     llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
3006     LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
3007     llvm::Value *Range = NDRangeL.getAddress().getPointer();
3008     llvm::Type *RangeTy = NDRangeL.getAddress().getType();
3009 
3010     if (NumArgs == 4) {
3011       // The most basic form of the call with parameters:
3012       // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
3013       Name = "__enqueue_kernel_basic";
3014       llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
3015                               GenericVoidPtrTy};
3016       llvm::FunctionType *FTy = llvm::FunctionType::get(
3017           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3018 
3019       auto Info =
3020           CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
3021       llvm::Value *Kernel =
3022           Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3023       llvm::Value *Block =
3024           Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3025 
3026       AttrBuilder B;
3027       B.addAttribute(Attribute::ByVal);
3028       llvm::AttributeList ByValAttrSet =
3029           llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
3030 
3031       auto RTCall =
3032           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
3033                              {Queue, Flags, Range, Kernel, Block});
3034       RTCall->setAttributes(ByValAttrSet);
3035       return RValue::get(RTCall);
3036     }
3037     assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
3038 
3039     // Create a temporary array to hold the sizes of local pointer arguments
3040     // for the block. \p First is the position of the first size argument.
3041     auto CreateArrayForSizeVar = [=](unsigned First) {
3042       auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First);
3043       auto *Arr = Builder.CreateAlloca(AT);
3044       llvm::Value *Ptr;
3045       // Each of the following arguments specifies the size of the corresponding
3046       // argument passed to the enqueued block.
3047       auto *Zero = llvm::ConstantInt::get(IntTy, 0);
3048       for (unsigned I = First; I < NumArgs; ++I) {
3049         auto *Index = llvm::ConstantInt::get(IntTy, I - First);
3050         auto *GEP = Builder.CreateGEP(Arr, {Zero, Index});
3051         if (I == First)
3052           Ptr = GEP;
3053         auto *V =
3054             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
3055         Builder.CreateAlignedStore(
3056             V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
3057       }
3058       return Ptr;
3059     };
3060 
3061     // Could have events and/or vaargs.
3062     if (E->getArg(3)->getType()->isBlockPointerType()) {
3063       // No events passed, but has variadic arguments.
3064       Name = "__enqueue_kernel_vaargs";
3065       auto Info =
3066           CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
3067       llvm::Value *Kernel =
3068           Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3069       auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3070       auto *PtrToSizeArray = CreateArrayForSizeVar(4);
3071 
3072       // Create a vector of the arguments, as well as a constant value to
3073       // express to the runtime the number of variadic arguments.
3074       std::vector<llvm::Value *> Args = {
3075           Queue,  Flags, Range,
3076           Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4),
3077           PtrToSizeArray};
3078       std::vector<llvm::Type *> ArgTys = {
3079           QueueTy,          IntTy,            RangeTy,
3080           GenericVoidPtrTy, GenericVoidPtrTy, IntTy,
3081           PtrToSizeArray->getType()};
3082 
3083       llvm::FunctionType *FTy = llvm::FunctionType::get(
3084           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3085       return RValue::get(
3086           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3087                              llvm::ArrayRef<llvm::Value *>(Args)));
3088     }
3089     // Any calls now have event arguments passed.
3090     if (NumArgs >= 7) {
3091       llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
3092       llvm::Type *EventPtrTy = EventTy->getPointerTo(
3093           CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
3094 
3095       llvm::Value *NumEvents =
3096           Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
3097       llvm::Value *EventList =
3098           E->getArg(4)->getType()->isArrayType()
3099               ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
3100               : EmitScalarExpr(E->getArg(4));
3101       llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
3102       // Convert to generic address space.
3103       EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
3104       ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
3105       auto Info =
3106           CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
3107       llvm::Value *Kernel =
3108           Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3109       llvm::Value *Block =
3110           Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3111 
3112       std::vector<llvm::Type *> ArgTys = {
3113           QueueTy,    Int32Ty,    RangeTy,          Int32Ty,
3114           EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
3115 
3116       std::vector<llvm::Value *> Args = {Queue,     Flags,    Range,  NumEvents,
3117                                          EventList, ClkEvent, Kernel, Block};
3118 
3119       if (NumArgs == 7) {
3120         // Has events but no variadics.
3121         Name = "__enqueue_kernel_basic_events";
3122         llvm::FunctionType *FTy = llvm::FunctionType::get(
3123             Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3124         return RValue::get(
3125             Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3126                                llvm::ArrayRef<llvm::Value *>(Args)));
3127       }
3128       // Has event info and variadics
3129       // Pass the number of variadics to the runtime function too.
3130       Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
3131       ArgTys.push_back(Int32Ty);
3132       Name = "__enqueue_kernel_events_vaargs";
3133 
3134       auto *PtrToSizeArray = CreateArrayForSizeVar(7);
3135       Args.push_back(PtrToSizeArray);
3136       ArgTys.push_back(PtrToSizeArray->getType());
3137 
3138       llvm::FunctionType *FTy = llvm::FunctionType::get(
3139           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3140       return RValue::get(
3141           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3142                              llvm::ArrayRef<llvm::Value *>(Args)));
3143     }
3144     LLVM_FALLTHROUGH;
3145   }
3146   // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
3147   // parameter.
3148   case Builtin::BIget_kernel_work_group_size: {
3149     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3150         getContext().getTargetAddressSpace(LangAS::opencl_generic));
3151     auto Info =
3152         CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
3153     Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3154     Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3155     return RValue::get(Builder.CreateCall(
3156         CGM.CreateRuntimeFunction(
3157             llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
3158                                     false),
3159             "__get_kernel_work_group_size_impl"),
3160         {Kernel, Arg}));
3161   }
3162   case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
3163     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3164         getContext().getTargetAddressSpace(LangAS::opencl_generic));
3165     auto Info =
3166         CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
3167     Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3168     Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3169     return RValue::get(Builder.CreateCall(
3170         CGM.CreateRuntimeFunction(
3171             llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
3172                                     false),
3173             "__get_kernel_preferred_work_group_multiple_impl"),
3174         {Kernel, Arg}));
3175   }
3176   case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
3177   case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
3178     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3179         getContext().getTargetAddressSpace(LangAS::opencl_generic));
3180     LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
3181     llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
3182     auto Info =
3183         CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
3184     Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3185     Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3186     const char *Name =
3187         BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
3188             ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
3189             : "__get_kernel_sub_group_count_for_ndrange_impl";
3190     return RValue::get(Builder.CreateCall(
3191         CGM.CreateRuntimeFunction(
3192             llvm::FunctionType::get(
3193                 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
3194                 false),
3195             Name),
3196         {NDRange, Kernel, Block}));
3197   }
3198 
3199   case Builtin::BI__builtin_store_half:
3200   case Builtin::BI__builtin_store_halff: {
3201     Value *Val = EmitScalarExpr(E->getArg(0));
3202     Address Address = EmitPointerWithAlignment(E->getArg(1));
3203     Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
3204     return RValue::get(Builder.CreateStore(HalfVal, Address));
3205   }
3206   case Builtin::BI__builtin_load_half: {
3207     Address Address = EmitPointerWithAlignment(E->getArg(0));
3208     Value *HalfVal = Builder.CreateLoad(Address);
3209     return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
3210   }
3211   case Builtin::BI__builtin_load_halff: {
3212     Address Address = EmitPointerWithAlignment(E->getArg(0));
3213     Value *HalfVal = Builder.CreateLoad(Address);
3214     return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
3215   }
3216   case Builtin::BIprintf:
3217     if (getTarget().getTriple().isNVPTX())
3218       return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
3219     break;
3220   case Builtin::BI__builtin_canonicalize:
3221   case Builtin::BI__builtin_canonicalizef:
3222   case Builtin::BI__builtin_canonicalizel:
3223     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
3224 
3225   case Builtin::BI__builtin_thread_pointer: {
3226     if (!getContext().getTargetInfo().isTLSSupported())
3227       CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
3228     // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
3229     break;
3230   }
3231   case Builtin::BI__builtin_os_log_format:
3232     return emitBuiltinOSLogFormat(*E);
3233 
3234   case Builtin::BI__builtin_os_log_format_buffer_size: {
3235     analyze_os_log::OSLogBufferLayout Layout;
3236     analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
3237     return RValue::get(ConstantInt::get(ConvertType(E->getType()),
3238                                         Layout.size().getQuantity()));
3239   }
3240 
3241   case Builtin::BI__xray_customevent: {
3242     if (!ShouldXRayInstrumentFunction())
3243       return RValue::getIgnored();
3244     if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
3245       if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
3246         return RValue::getIgnored();
3247 
3248     Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
3249     auto FTy = F->getFunctionType();
3250     auto Arg0 = E->getArg(0);
3251     auto Arg0Val = EmitScalarExpr(Arg0);
3252     auto Arg0Ty = Arg0->getType();
3253     auto PTy0 = FTy->getParamType(0);
3254     if (PTy0 != Arg0Val->getType()) {
3255       if (Arg0Ty->isArrayType())
3256         Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
3257       else
3258         Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
3259     }
3260     auto Arg1 = EmitScalarExpr(E->getArg(1));
3261     auto PTy1 = FTy->getParamType(1);
3262     if (PTy1 != Arg1->getType())
3263       Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
3264     return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
3265   }
3266 
3267   case Builtin::BI__builtin_ms_va_start:
3268   case Builtin::BI__builtin_ms_va_end:
3269     return RValue::get(
3270         EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
3271                        BuiltinID == Builtin::BI__builtin_ms_va_start));
3272 
3273   case Builtin::BI__builtin_ms_va_copy: {
3274     // Lower this manually. We can't reliably determine whether or not any
3275     // given va_copy() is for a Win64 va_list from the calling convention
3276     // alone, because it's legal to do this from a System V ABI function.
3277     // With opaque pointer types, we won't have enough information in LLVM
3278     // IR to determine this from the argument types, either. Best to do it
3279     // now, while we have enough information.
3280     Address DestAddr = EmitMSVAListRef(E->getArg(0));
3281     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
3282 
3283     llvm::Type *BPP = Int8PtrPtrTy;
3284 
3285     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
3286                        DestAddr.getAlignment());
3287     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
3288                       SrcAddr.getAlignment());
3289 
3290     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
3291     return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
3292   }
3293   }
3294 
3295   // If this is an alias for a lib function (e.g. __builtin_sin), emit
3296   // the call using the normal call path, but using the unmangled
3297   // version of the function name.
3298   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
3299     return emitLibraryCall(*this, FD, E,
3300                            CGM.getBuiltinLibFunction(FD, BuiltinID));
3301 
3302   // If this is a predefined lib function (e.g. malloc), emit the call
3303   // using exactly the normal call path.
3304   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
3305     return emitLibraryCall(*this, FD, E,
3306                       cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
3307 
3308   // Check that a call to a target specific builtin has the correct target
3309   // features.
3310   // This is down here to avoid non-target specific builtins, however, if
3311   // generic builtins start to require generic target features then we
3312   // can move this up to the beginning of the function.
3313   checkTargetFeatures(E, FD);
3314 
3315   // See if we have a target specific intrinsic.
3316   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
3317   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
3318   StringRef Prefix =
3319       llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
3320   if (!Prefix.empty()) {
3321     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
3322     // NOTE we dont need to perform a compatibility flag check here since the
3323     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
3324     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
3325     if (IntrinsicID == Intrinsic::not_intrinsic)
3326       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
3327   }
3328 
3329   if (IntrinsicID != Intrinsic::not_intrinsic) {
3330     SmallVector<Value*, 16> Args;
3331 
3332     // Find out if any arguments are required to be integer constant
3333     // expressions.
3334     unsigned ICEArguments = 0;
3335     ASTContext::GetBuiltinTypeError Error;
3336     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3337     assert(Error == ASTContext::GE_None && "Should not codegen an error");
3338 
3339     Function *F = CGM.getIntrinsic(IntrinsicID);
3340     llvm::FunctionType *FTy = F->getFunctionType();
3341 
3342     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
3343       Value *ArgValue;
3344       // If this is a normal argument, just emit it as a scalar.
3345       if ((ICEArguments & (1 << i)) == 0) {
3346         ArgValue = EmitScalarExpr(E->getArg(i));
3347       } else {
3348         // If this is required to be a constant, constant fold it so that we
3349         // know that the generated intrinsic gets a ConstantInt.
3350         llvm::APSInt Result;
3351         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
3352         assert(IsConst && "Constant arg isn't actually constant?");
3353         (void)IsConst;
3354         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
3355       }
3356 
3357       // If the intrinsic arg type is different from the builtin arg type
3358       // we need to do a bit cast.
3359       llvm::Type *PTy = FTy->getParamType(i);
3360       if (PTy != ArgValue->getType()) {
3361         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
3362                "Must be able to losslessly bit cast to param");
3363         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
3364       }
3365 
3366       Args.push_back(ArgValue);
3367     }
3368 
3369     Value *V = Builder.CreateCall(F, Args);
3370     QualType BuiltinRetType = E->getType();
3371 
3372     llvm::Type *RetTy = VoidTy;
3373     if (!BuiltinRetType->isVoidType())
3374       RetTy = ConvertType(BuiltinRetType);
3375 
3376     if (RetTy != V->getType()) {
3377       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
3378              "Must be able to losslessly bit cast result type");
3379       V = Builder.CreateBitCast(V, RetTy);
3380     }
3381 
3382     return RValue::get(V);
3383   }
3384 
3385   // See if we have a target specific builtin that needs to be lowered.
3386   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
3387     return RValue::get(V);
3388 
3389   ErrorUnsupported(E, "builtin function");
3390 
3391   // Unknown builtin, for now just dump it out and return undef.
3392   return GetUndefRValue(E->getType());
3393 }
3394 
3395 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
3396                                         unsigned BuiltinID, const CallExpr *E,
3397                                         llvm::Triple::ArchType Arch) {
3398   switch (Arch) {
3399   case llvm::Triple::arm:
3400   case llvm::Triple::armeb:
3401   case llvm::Triple::thumb:
3402   case llvm::Triple::thumbeb:
3403     return CGF->EmitARMBuiltinExpr(BuiltinID, E, Arch);
3404   case llvm::Triple::aarch64:
3405   case llvm::Triple::aarch64_be:
3406     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
3407   case llvm::Triple::x86:
3408   case llvm::Triple::x86_64:
3409     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
3410   case llvm::Triple::ppc:
3411   case llvm::Triple::ppc64:
3412   case llvm::Triple::ppc64le:
3413     return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
3414   case llvm::Triple::r600:
3415   case llvm::Triple::amdgcn:
3416     return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
3417   case llvm::Triple::systemz:
3418     return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
3419   case llvm::Triple::nvptx:
3420   case llvm::Triple::nvptx64:
3421     return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
3422   case llvm::Triple::wasm32:
3423   case llvm::Triple::wasm64:
3424     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
3425   case llvm::Triple::hexagon:
3426     return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
3427   default:
3428     return nullptr;
3429   }
3430 }
3431 
3432 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
3433                                               const CallExpr *E) {
3434   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
3435     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
3436     return EmitTargetArchBuiltinExpr(
3437         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
3438         getContext().getAuxTargetInfo()->getTriple().getArch());
3439   }
3440 
3441   return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
3442                                    getTarget().getTriple().getArch());
3443 }
3444 
3445 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
3446                                      NeonTypeFlags TypeFlags,
3447                                      bool HasLegalHalfType=true,
3448                                      bool V1Ty=false) {
3449   int IsQuad = TypeFlags.isQuad();
3450   switch (TypeFlags.getEltType()) {
3451   case NeonTypeFlags::Int8:
3452   case NeonTypeFlags::Poly8:
3453     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
3454   case NeonTypeFlags::Int16:
3455   case NeonTypeFlags::Poly16:
3456     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
3457   case NeonTypeFlags::Float16:
3458     if (HasLegalHalfType)
3459       return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
3460     else
3461       return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
3462   case NeonTypeFlags::Int32:
3463     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
3464   case NeonTypeFlags::Int64:
3465   case NeonTypeFlags::Poly64:
3466     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
3467   case NeonTypeFlags::Poly128:
3468     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
3469     // There is a lot of i128 and f128 API missing.
3470     // so we use v16i8 to represent poly128 and get pattern matched.
3471     return llvm::VectorType::get(CGF->Int8Ty, 16);
3472   case NeonTypeFlags::Float32:
3473     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
3474   case NeonTypeFlags::Float64:
3475     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
3476   }
3477   llvm_unreachable("Unknown vector element type!");
3478 }
3479 
3480 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
3481                                           NeonTypeFlags IntTypeFlags) {
3482   int IsQuad = IntTypeFlags.isQuad();
3483   switch (IntTypeFlags.getEltType()) {
3484   case NeonTypeFlags::Int16:
3485     return llvm::VectorType::get(CGF->HalfTy, (4 << IsQuad));
3486   case NeonTypeFlags::Int32:
3487     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
3488   case NeonTypeFlags::Int64:
3489     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
3490   default:
3491     llvm_unreachable("Type can't be converted to floating-point!");
3492   }
3493 }
3494 
3495 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
3496   unsigned nElts = V->getType()->getVectorNumElements();
3497   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
3498   return Builder.CreateShuffleVector(V, V, SV, "lane");
3499 }
3500 
3501 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
3502                                      const char *name,
3503                                      unsigned shift, bool rightshift) {
3504   unsigned j = 0;
3505   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3506        ai != ae; ++ai, ++j)
3507     if (shift > 0 && shift == j)
3508       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
3509     else
3510       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
3511 
3512   return Builder.CreateCall(F, Ops, name);
3513 }
3514 
3515 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
3516                                             bool neg) {
3517   int SV = cast<ConstantInt>(V)->getSExtValue();
3518   return ConstantInt::get(Ty, neg ? -SV : SV);
3519 }
3520 
3521 // \brief Right-shift a vector by a constant.
3522 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
3523                                           llvm::Type *Ty, bool usgn,
3524                                           const char *name) {
3525   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
3526 
3527   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
3528   int EltSize = VTy->getScalarSizeInBits();
3529 
3530   Vec = Builder.CreateBitCast(Vec, Ty);
3531 
3532   // lshr/ashr are undefined when the shift amount is equal to the vector
3533   // element size.
3534   if (ShiftAmt == EltSize) {
3535     if (usgn) {
3536       // Right-shifting an unsigned value by its size yields 0.
3537       return llvm::ConstantAggregateZero::get(VTy);
3538     } else {
3539       // Right-shifting a signed value by its size is equivalent
3540       // to a shift of size-1.
3541       --ShiftAmt;
3542       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
3543     }
3544   }
3545 
3546   Shift = EmitNeonShiftVector(Shift, Ty, false);
3547   if (usgn)
3548     return Builder.CreateLShr(Vec, Shift, name);
3549   else
3550     return Builder.CreateAShr(Vec, Shift, name);
3551 }
3552 
3553 enum {
3554   AddRetType = (1 << 0),
3555   Add1ArgType = (1 << 1),
3556   Add2ArgTypes = (1 << 2),
3557 
3558   VectorizeRetType = (1 << 3),
3559   VectorizeArgTypes = (1 << 4),
3560 
3561   InventFloatType = (1 << 5),
3562   UnsignedAlts = (1 << 6),
3563 
3564   Use64BitVectors = (1 << 7),
3565   Use128BitVectors = (1 << 8),
3566 
3567   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
3568   VectorRet = AddRetType | VectorizeRetType,
3569   VectorRetGetArgs01 =
3570       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
3571   FpCmpzModifiers =
3572       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
3573 };
3574 
3575 namespace {
3576 struct NeonIntrinsicInfo {
3577   const char *NameHint;
3578   unsigned BuiltinID;
3579   unsigned LLVMIntrinsic;
3580   unsigned AltLLVMIntrinsic;
3581   unsigned TypeModifier;
3582 
3583   bool operator<(unsigned RHSBuiltinID) const {
3584     return BuiltinID < RHSBuiltinID;
3585   }
3586   bool operator<(const NeonIntrinsicInfo &TE) const {
3587     return BuiltinID < TE.BuiltinID;
3588   }
3589 };
3590 } // end anonymous namespace
3591 
3592 #define NEONMAP0(NameBase) \
3593   { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
3594 
3595 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
3596   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3597       Intrinsic::LLVMIntrinsic, 0, TypeModifier }
3598 
3599 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
3600   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3601       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
3602       TypeModifier }
3603 
3604 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
3605   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3606   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3607   NEONMAP1(vabs_v, arm_neon_vabs, 0),
3608   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
3609   NEONMAP0(vaddhn_v),
3610   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
3611   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
3612   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
3613   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
3614   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
3615   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
3616   NEONMAP1(vcage_v, arm_neon_vacge, 0),
3617   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
3618   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
3619   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
3620   NEONMAP1(vcale_v, arm_neon_vacge, 0),
3621   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
3622   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
3623   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
3624   NEONMAP0(vceqz_v),
3625   NEONMAP0(vceqzq_v),
3626   NEONMAP0(vcgez_v),
3627   NEONMAP0(vcgezq_v),
3628   NEONMAP0(vcgtz_v),
3629   NEONMAP0(vcgtzq_v),
3630   NEONMAP0(vclez_v),
3631   NEONMAP0(vclezq_v),
3632   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
3633   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
3634   NEONMAP0(vcltz_v),
3635   NEONMAP0(vcltzq_v),
3636   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3637   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3638   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3639   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3640   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
3641   NEONMAP0(vcvt_f16_v),
3642   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
3643   NEONMAP0(vcvt_f32_v),
3644   NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3645   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3646   NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0),
3647   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3648   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3649   NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0),
3650   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3651   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3652   NEONMAP0(vcvt_s16_v),
3653   NEONMAP0(vcvt_s32_v),
3654   NEONMAP0(vcvt_s64_v),
3655   NEONMAP0(vcvt_u16_v),
3656   NEONMAP0(vcvt_u32_v),
3657   NEONMAP0(vcvt_u64_v),
3658   NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0),
3659   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
3660   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
3661   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
3662   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
3663   NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0),
3664   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
3665   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
3666   NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0),
3667   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
3668   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
3669   NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0),
3670   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
3671   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
3672   NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0),
3673   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
3674   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
3675   NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0),
3676   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
3677   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
3678   NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0),
3679   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
3680   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
3681   NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0),
3682   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
3683   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
3684   NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0),
3685   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
3686   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
3687   NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0),
3688   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
3689   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
3690   NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0),
3691   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
3692   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
3693   NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0),
3694   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
3695   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
3696   NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0),
3697   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
3698   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
3699   NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0),
3700   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
3701   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
3702   NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0),
3703   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
3704   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
3705   NEONMAP0(vcvtq_f16_v),
3706   NEONMAP0(vcvtq_f32_v),
3707   NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3708   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3709   NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0),
3710   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3711   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3712   NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0),
3713   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3714   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3715   NEONMAP0(vcvtq_s16_v),
3716   NEONMAP0(vcvtq_s32_v),
3717   NEONMAP0(vcvtq_s64_v),
3718   NEONMAP0(vcvtq_u16_v),
3719   NEONMAP0(vcvtq_u32_v),
3720   NEONMAP0(vcvtq_u64_v),
3721   NEONMAP0(vext_v),
3722   NEONMAP0(vextq_v),
3723   NEONMAP0(vfma_v),
3724   NEONMAP0(vfmaq_v),
3725   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3726   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3727   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3728   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3729   NEONMAP0(vld1_dup_v),
3730   NEONMAP1(vld1_v, arm_neon_vld1, 0),
3731   NEONMAP0(vld1q_dup_v),
3732   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
3733   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
3734   NEONMAP1(vld2_v, arm_neon_vld2, 0),
3735   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
3736   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
3737   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
3738   NEONMAP1(vld3_v, arm_neon_vld3, 0),
3739   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
3740   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
3741   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
3742   NEONMAP1(vld4_v, arm_neon_vld4, 0),
3743   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
3744   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
3745   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3746   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
3747   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
3748   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3749   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3750   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
3751   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
3752   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3753   NEONMAP0(vmovl_v),
3754   NEONMAP0(vmovn_v),
3755   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
3756   NEONMAP0(vmull_v),
3757   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
3758   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3759   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3760   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
3761   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3762   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3763   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
3764   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
3765   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
3766   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
3767   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
3768   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3769   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3770   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
3771   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
3772   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
3773   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
3774   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
3775   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
3776   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
3777   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
3778   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
3779   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
3780   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
3781   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3782   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3783   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3784   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3785   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3786   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3787   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
3788   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
3789   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3790   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3791   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
3792   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3793   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3794   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
3795   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
3796   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3797   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3798   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
3799   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
3800   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
3801   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
3802   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
3803   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
3804   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
3805   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
3806   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
3807   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
3808   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
3809   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
3810   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3811   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3812   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3813   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3814   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3815   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3816   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
3817   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
3818   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
3819   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
3820   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
3821   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
3822   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
3823   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
3824   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
3825   NEONMAP0(vshl_n_v),
3826   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3827   NEONMAP0(vshll_n_v),
3828   NEONMAP0(vshlq_n_v),
3829   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3830   NEONMAP0(vshr_n_v),
3831   NEONMAP0(vshrn_n_v),
3832   NEONMAP0(vshrq_n_v),
3833   NEONMAP1(vst1_v, arm_neon_vst1, 0),
3834   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
3835   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
3836   NEONMAP1(vst2_v, arm_neon_vst2, 0),
3837   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
3838   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
3839   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
3840   NEONMAP1(vst3_v, arm_neon_vst3, 0),
3841   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
3842   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
3843   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
3844   NEONMAP1(vst4_v, arm_neon_vst4, 0),
3845   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
3846   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
3847   NEONMAP0(vsubhn_v),
3848   NEONMAP0(vtrn_v),
3849   NEONMAP0(vtrnq_v),
3850   NEONMAP0(vtst_v),
3851   NEONMAP0(vtstq_v),
3852   NEONMAP0(vuzp_v),
3853   NEONMAP0(vuzpq_v),
3854   NEONMAP0(vzip_v),
3855   NEONMAP0(vzipq_v)
3856 };
3857 
3858 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
3859   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
3860   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
3861   NEONMAP0(vaddhn_v),
3862   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
3863   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
3864   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
3865   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
3866   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
3867   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
3868   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
3869   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
3870   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
3871   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
3872   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
3873   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
3874   NEONMAP0(vceqz_v),
3875   NEONMAP0(vceqzq_v),
3876   NEONMAP0(vcgez_v),
3877   NEONMAP0(vcgezq_v),
3878   NEONMAP0(vcgtz_v),
3879   NEONMAP0(vcgtzq_v),
3880   NEONMAP0(vclez_v),
3881   NEONMAP0(vclezq_v),
3882   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
3883   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
3884   NEONMAP0(vcltz_v),
3885   NEONMAP0(vcltzq_v),
3886   NEONMAP1(vclz_v, ctlz, Add1ArgType),
3887   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3888   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3889   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3890   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
3891   NEONMAP0(vcvt_f16_v),
3892   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
3893   NEONMAP0(vcvt_f32_v),
3894   NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3895   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3896   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3897   NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
3898   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3899   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3900   NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
3901   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3902   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3903   NEONMAP0(vcvtq_f16_v),
3904   NEONMAP0(vcvtq_f32_v),
3905   NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3906   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3907   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3908   NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
3909   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3910   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3911   NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
3912   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3913   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3914   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
3915   NEONMAP0(vext_v),
3916   NEONMAP0(vextq_v),
3917   NEONMAP0(vfma_v),
3918   NEONMAP0(vfmaq_v),
3919   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3920   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3921   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3922   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3923   NEONMAP0(vmovl_v),
3924   NEONMAP0(vmovn_v),
3925   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
3926   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
3927   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
3928   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3929   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3930   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
3931   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
3932   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
3933   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3934   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3935   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
3936   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
3937   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
3938   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
3939   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
3940   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
3941   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
3942   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
3943   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
3944   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
3945   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
3946   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3947   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3948   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
3949   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3950   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
3951   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3952   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
3953   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
3954   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3955   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3956   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
3957   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3958   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3959   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
3960   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
3961   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3962   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3963   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3964   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3965   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3966   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3967   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3968   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3969   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
3970   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
3971   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
3972   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
3973   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
3974   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
3975   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
3976   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
3977   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
3978   NEONMAP0(vshl_n_v),
3979   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3980   NEONMAP0(vshll_n_v),
3981   NEONMAP0(vshlq_n_v),
3982   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3983   NEONMAP0(vshr_n_v),
3984   NEONMAP0(vshrn_n_v),
3985   NEONMAP0(vshrq_n_v),
3986   NEONMAP0(vsubhn_v),
3987   NEONMAP0(vtst_v),
3988   NEONMAP0(vtstq_v),
3989 };
3990 
3991 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3992   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3993   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3994   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3995   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3996   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3997   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3998   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3999   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
4000   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
4001   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4002   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
4003   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
4004   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
4005   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
4006   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4007   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4008   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
4009   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
4010   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
4011   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
4012   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
4013   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
4014   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
4015   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
4016   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
4017   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
4018   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
4019   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
4020   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4021   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4022   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4023   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4024   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4025   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4026   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4027   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4028   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4029   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
4030   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4031   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
4032   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
4033   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
4034   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
4035   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
4036   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4037   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4038   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4039   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4040   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
4041   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4042   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4043   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4044   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4045   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
4046   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
4047   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4048   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4049   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
4050   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
4051   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4052   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4053   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4054   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
4055   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
4056   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
4057   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
4058   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
4059   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
4060   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
4061   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
4062   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
4063   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
4064   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4065   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4066   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4067   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4068   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4069   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4070   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4071   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4072   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
4073   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
4074   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
4075   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
4076   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
4077   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
4078   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
4079   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
4080   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
4081   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
4082   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
4083   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
4084   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
4085   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
4086   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
4087   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
4088   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
4089   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
4090   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
4091   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
4092   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
4093   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
4094   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
4095   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
4096   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
4097   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
4098   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
4099   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
4100   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
4101   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
4102   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
4103   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
4104   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
4105   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
4106   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
4107   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
4108   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
4109   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
4110   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
4111   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
4112   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
4113   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
4114   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
4115   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
4116   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
4117   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
4118   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
4119   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
4120   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
4121   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
4122   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4123   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4124   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4125   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4126   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
4127   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
4128   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4129   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4130   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4131   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4132   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
4133   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
4134   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
4135   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
4136   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
4137   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
4138   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
4139   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
4140   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
4141   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
4142   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
4143   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
4144   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
4145   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
4146   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
4147   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
4148   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
4149   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
4150   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
4151   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
4152   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
4153   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
4154   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
4155   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
4156   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
4157   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
4158   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
4159   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
4160   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
4161   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
4162   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
4163   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
4164   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
4165   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
4166   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
4167   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
4168   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
4169   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
4170   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
4171   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
4172   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
4173   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
4174   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
4175   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
4176   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
4177   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
4178   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
4179   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
4180   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
4181   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
4182   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
4183   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
4184   // FP16 scalar intrinisics go here.
4185   NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
4186   NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
4187   NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
4188   NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
4189   NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
4190   NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4191   NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4192   NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4193   NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4194   NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4195   NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4196   NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4197   NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4198   NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4199   NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4200   NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4201   NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4202   NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4203   NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4204   NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
4205   NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
4206   NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
4207   NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
4208   NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
4209   NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
4210   NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
4211   NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
4212   NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
4213   NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
4214   NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
4215 };
4216 
4217 #undef NEONMAP0
4218 #undef NEONMAP1
4219 #undef NEONMAP2
4220 
4221 static bool NEONSIMDIntrinsicsProvenSorted = false;
4222 
4223 static bool AArch64SIMDIntrinsicsProvenSorted = false;
4224 static bool AArch64SISDIntrinsicsProvenSorted = false;
4225 
4226 
4227 static const NeonIntrinsicInfo *
4228 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
4229                        unsigned BuiltinID, bool &MapProvenSorted) {
4230 
4231 #ifndef NDEBUG
4232   if (!MapProvenSorted) {
4233     assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
4234     MapProvenSorted = true;
4235   }
4236 #endif
4237 
4238   const NeonIntrinsicInfo *Builtin =
4239       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
4240 
4241   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
4242     return Builtin;
4243 
4244   return nullptr;
4245 }
4246 
4247 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
4248                                                    unsigned Modifier,
4249                                                    llvm::Type *ArgType,
4250                                                    const CallExpr *E) {
4251   int VectorSize = 0;
4252   if (Modifier & Use64BitVectors)
4253     VectorSize = 64;
4254   else if (Modifier & Use128BitVectors)
4255     VectorSize = 128;
4256 
4257   // Return type.
4258   SmallVector<llvm::Type *, 3> Tys;
4259   if (Modifier & AddRetType) {
4260     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
4261     if (Modifier & VectorizeRetType)
4262       Ty = llvm::VectorType::get(
4263           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
4264 
4265     Tys.push_back(Ty);
4266   }
4267 
4268   // Arguments.
4269   if (Modifier & VectorizeArgTypes) {
4270     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
4271     ArgType = llvm::VectorType::get(ArgType, Elts);
4272   }
4273 
4274   if (Modifier & (Add1ArgType | Add2ArgTypes))
4275     Tys.push_back(ArgType);
4276 
4277   if (Modifier & Add2ArgTypes)
4278     Tys.push_back(ArgType);
4279 
4280   if (Modifier & InventFloatType)
4281     Tys.push_back(FloatTy);
4282 
4283   return CGM.getIntrinsic(IntrinsicID, Tys);
4284 }
4285 
4286 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
4287                                             const NeonIntrinsicInfo &SISDInfo,
4288                                             SmallVectorImpl<Value *> &Ops,
4289                                             const CallExpr *E) {
4290   unsigned BuiltinID = SISDInfo.BuiltinID;
4291   unsigned int Int = SISDInfo.LLVMIntrinsic;
4292   unsigned Modifier = SISDInfo.TypeModifier;
4293   const char *s = SISDInfo.NameHint;
4294 
4295   switch (BuiltinID) {
4296   case NEON::BI__builtin_neon_vcled_s64:
4297   case NEON::BI__builtin_neon_vcled_u64:
4298   case NEON::BI__builtin_neon_vcles_f32:
4299   case NEON::BI__builtin_neon_vcled_f64:
4300   case NEON::BI__builtin_neon_vcltd_s64:
4301   case NEON::BI__builtin_neon_vcltd_u64:
4302   case NEON::BI__builtin_neon_vclts_f32:
4303   case NEON::BI__builtin_neon_vcltd_f64:
4304   case NEON::BI__builtin_neon_vcales_f32:
4305   case NEON::BI__builtin_neon_vcaled_f64:
4306   case NEON::BI__builtin_neon_vcalts_f32:
4307   case NEON::BI__builtin_neon_vcaltd_f64:
4308     // Only one direction of comparisons actually exist, cmle is actually a cmge
4309     // with swapped operands. The table gives us the right intrinsic but we
4310     // still need to do the swap.
4311     std::swap(Ops[0], Ops[1]);
4312     break;
4313   }
4314 
4315   assert(Int && "Generic code assumes a valid intrinsic");
4316 
4317   // Determine the type(s) of this overloaded AArch64 intrinsic.
4318   const Expr *Arg = E->getArg(0);
4319   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
4320   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
4321 
4322   int j = 0;
4323   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
4324   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
4325        ai != ae; ++ai, ++j) {
4326     llvm::Type *ArgTy = ai->getType();
4327     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
4328              ArgTy->getPrimitiveSizeInBits())
4329       continue;
4330 
4331     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
4332     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
4333     // it before inserting.
4334     Ops[j] =
4335         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
4336     Ops[j] =
4337         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
4338   }
4339 
4340   Value *Result = CGF.EmitNeonCall(F, Ops, s);
4341   llvm::Type *ResultType = CGF.ConvertType(E->getType());
4342   if (ResultType->getPrimitiveSizeInBits() <
4343       Result->getType()->getPrimitiveSizeInBits())
4344     return CGF.Builder.CreateExtractElement(Result, C0);
4345 
4346   return CGF.Builder.CreateBitCast(Result, ResultType, s);
4347 }
4348 
4349 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
4350     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
4351     const char *NameHint, unsigned Modifier, const CallExpr *E,
4352     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
4353     llvm::Triple::ArchType Arch) {
4354   // Get the last argument, which specifies the vector type.
4355   llvm::APSInt NeonTypeConst;
4356   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
4357   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
4358     return nullptr;
4359 
4360   // Determine the type of this overloaded NEON intrinsic.
4361   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
4362   bool Usgn = Type.isUnsigned();
4363   bool Quad = Type.isQuad();
4364   const bool HasLegalHalfType = getTarget().hasLegalHalfType();
4365 
4366   llvm::VectorType *VTy = GetNeonType(this, Type, HasLegalHalfType);
4367   llvm::Type *Ty = VTy;
4368   if (!Ty)
4369     return nullptr;
4370 
4371   auto getAlignmentValue32 = [&](Address addr) -> Value* {
4372     return Builder.getInt32(addr.getAlignment().getQuantity());
4373   };
4374 
4375   unsigned Int = LLVMIntrinsic;
4376   if ((Modifier & UnsignedAlts) && !Usgn)
4377     Int = AltLLVMIntrinsic;
4378 
4379   switch (BuiltinID) {
4380   default: break;
4381   case NEON::BI__builtin_neon_vabs_v:
4382   case NEON::BI__builtin_neon_vabsq_v:
4383     if (VTy->getElementType()->isFloatingPointTy())
4384       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
4385     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
4386   case NEON::BI__builtin_neon_vaddhn_v: {
4387     llvm::VectorType *SrcTy =
4388         llvm::VectorType::getExtendedElementVectorType(VTy);
4389 
4390     // %sum = add <4 x i32> %lhs, %rhs
4391     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4392     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4393     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
4394 
4395     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4396     Constant *ShiftAmt =
4397         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4398     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
4399 
4400     // %res = trunc <4 x i32> %high to <4 x i16>
4401     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
4402   }
4403   case NEON::BI__builtin_neon_vcale_v:
4404   case NEON::BI__builtin_neon_vcaleq_v:
4405   case NEON::BI__builtin_neon_vcalt_v:
4406   case NEON::BI__builtin_neon_vcaltq_v:
4407     std::swap(Ops[0], Ops[1]);
4408     LLVM_FALLTHROUGH;
4409   case NEON::BI__builtin_neon_vcage_v:
4410   case NEON::BI__builtin_neon_vcageq_v:
4411   case NEON::BI__builtin_neon_vcagt_v:
4412   case NEON::BI__builtin_neon_vcagtq_v: {
4413     llvm::Type *Ty;
4414     switch (VTy->getScalarSizeInBits()) {
4415     default: llvm_unreachable("unexpected type");
4416     case 32:
4417       Ty = FloatTy;
4418       break;
4419     case 64:
4420       Ty = DoubleTy;
4421       break;
4422     case 16:
4423       Ty = HalfTy;
4424       break;
4425     }
4426     llvm::Type *VecFlt = llvm::VectorType::get(Ty, VTy->getNumElements());
4427     llvm::Type *Tys[] = { VTy, VecFlt };
4428     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4429     return EmitNeonCall(F, Ops, NameHint);
4430   }
4431   case NEON::BI__builtin_neon_vceqz_v:
4432   case NEON::BI__builtin_neon_vceqzq_v:
4433     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
4434                                          ICmpInst::ICMP_EQ, "vceqz");
4435   case NEON::BI__builtin_neon_vcgez_v:
4436   case NEON::BI__builtin_neon_vcgezq_v:
4437     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
4438                                          ICmpInst::ICMP_SGE, "vcgez");
4439   case NEON::BI__builtin_neon_vclez_v:
4440   case NEON::BI__builtin_neon_vclezq_v:
4441     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
4442                                          ICmpInst::ICMP_SLE, "vclez");
4443   case NEON::BI__builtin_neon_vcgtz_v:
4444   case NEON::BI__builtin_neon_vcgtzq_v:
4445     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
4446                                          ICmpInst::ICMP_SGT, "vcgtz");
4447   case NEON::BI__builtin_neon_vcltz_v:
4448   case NEON::BI__builtin_neon_vcltzq_v:
4449     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
4450                                          ICmpInst::ICMP_SLT, "vcltz");
4451   case NEON::BI__builtin_neon_vclz_v:
4452   case NEON::BI__builtin_neon_vclzq_v:
4453     // We generate target-independent intrinsic, which needs a second argument
4454     // for whether or not clz of zero is undefined; on ARM it isn't.
4455     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
4456     break;
4457   case NEON::BI__builtin_neon_vcvt_f32_v:
4458   case NEON::BI__builtin_neon_vcvtq_f32_v:
4459     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4460     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
4461                      HasLegalHalfType);
4462     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
4463                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
4464   case NEON::BI__builtin_neon_vcvt_f16_v:
4465   case NEON::BI__builtin_neon_vcvtq_f16_v:
4466     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4467     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
4468                      HasLegalHalfType);
4469     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
4470                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
4471   case NEON::BI__builtin_neon_vcvt_n_f16_v:
4472   case NEON::BI__builtin_neon_vcvt_n_f32_v:
4473   case NEON::BI__builtin_neon_vcvt_n_f64_v:
4474   case NEON::BI__builtin_neon_vcvtq_n_f16_v:
4475   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
4476   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
4477     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
4478     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
4479     Function *F = CGM.getIntrinsic(Int, Tys);
4480     return EmitNeonCall(F, Ops, "vcvt_n");
4481   }
4482   case NEON::BI__builtin_neon_vcvt_n_s16_v:
4483   case NEON::BI__builtin_neon_vcvt_n_s32_v:
4484   case NEON::BI__builtin_neon_vcvt_n_u16_v:
4485   case NEON::BI__builtin_neon_vcvt_n_u32_v:
4486   case NEON::BI__builtin_neon_vcvt_n_s64_v:
4487   case NEON::BI__builtin_neon_vcvt_n_u64_v:
4488   case NEON::BI__builtin_neon_vcvtq_n_s16_v:
4489   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
4490   case NEON::BI__builtin_neon_vcvtq_n_u16_v:
4491   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
4492   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
4493   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
4494     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
4495     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4496     return EmitNeonCall(F, Ops, "vcvt_n");
4497   }
4498   case NEON::BI__builtin_neon_vcvt_s32_v:
4499   case NEON::BI__builtin_neon_vcvt_u32_v:
4500   case NEON::BI__builtin_neon_vcvt_s64_v:
4501   case NEON::BI__builtin_neon_vcvt_u64_v:
4502   case NEON::BI__builtin_neon_vcvt_s16_v:
4503   case NEON::BI__builtin_neon_vcvt_u16_v:
4504   case NEON::BI__builtin_neon_vcvtq_s32_v:
4505   case NEON::BI__builtin_neon_vcvtq_u32_v:
4506   case NEON::BI__builtin_neon_vcvtq_s64_v:
4507   case NEON::BI__builtin_neon_vcvtq_u64_v:
4508   case NEON::BI__builtin_neon_vcvtq_s16_v:
4509   case NEON::BI__builtin_neon_vcvtq_u16_v: {
4510     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
4511     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
4512                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
4513   }
4514   case NEON::BI__builtin_neon_vcvta_s16_v:
4515   case NEON::BI__builtin_neon_vcvta_s32_v:
4516   case NEON::BI__builtin_neon_vcvta_s64_v:
4517   case NEON::BI__builtin_neon_vcvta_u32_v:
4518   case NEON::BI__builtin_neon_vcvta_u64_v:
4519   case NEON::BI__builtin_neon_vcvtaq_s16_v:
4520   case NEON::BI__builtin_neon_vcvtaq_s32_v:
4521   case NEON::BI__builtin_neon_vcvtaq_s64_v:
4522   case NEON::BI__builtin_neon_vcvtaq_u16_v:
4523   case NEON::BI__builtin_neon_vcvtaq_u32_v:
4524   case NEON::BI__builtin_neon_vcvtaq_u64_v:
4525   case NEON::BI__builtin_neon_vcvtn_s16_v:
4526   case NEON::BI__builtin_neon_vcvtn_s32_v:
4527   case NEON::BI__builtin_neon_vcvtn_s64_v:
4528   case NEON::BI__builtin_neon_vcvtn_u16_v:
4529   case NEON::BI__builtin_neon_vcvtn_u32_v:
4530   case NEON::BI__builtin_neon_vcvtn_u64_v:
4531   case NEON::BI__builtin_neon_vcvtnq_s16_v:
4532   case NEON::BI__builtin_neon_vcvtnq_s32_v:
4533   case NEON::BI__builtin_neon_vcvtnq_s64_v:
4534   case NEON::BI__builtin_neon_vcvtnq_u16_v:
4535   case NEON::BI__builtin_neon_vcvtnq_u32_v:
4536   case NEON::BI__builtin_neon_vcvtnq_u64_v:
4537   case NEON::BI__builtin_neon_vcvtp_s16_v:
4538   case NEON::BI__builtin_neon_vcvtp_s32_v:
4539   case NEON::BI__builtin_neon_vcvtp_s64_v:
4540   case NEON::BI__builtin_neon_vcvtp_u16_v:
4541   case NEON::BI__builtin_neon_vcvtp_u32_v:
4542   case NEON::BI__builtin_neon_vcvtp_u64_v:
4543   case NEON::BI__builtin_neon_vcvtpq_s16_v:
4544   case NEON::BI__builtin_neon_vcvtpq_s32_v:
4545   case NEON::BI__builtin_neon_vcvtpq_s64_v:
4546   case NEON::BI__builtin_neon_vcvtpq_u16_v:
4547   case NEON::BI__builtin_neon_vcvtpq_u32_v:
4548   case NEON::BI__builtin_neon_vcvtpq_u64_v:
4549   case NEON::BI__builtin_neon_vcvtm_s16_v:
4550   case NEON::BI__builtin_neon_vcvtm_s32_v:
4551   case NEON::BI__builtin_neon_vcvtm_s64_v:
4552   case NEON::BI__builtin_neon_vcvtm_u16_v:
4553   case NEON::BI__builtin_neon_vcvtm_u32_v:
4554   case NEON::BI__builtin_neon_vcvtm_u64_v:
4555   case NEON::BI__builtin_neon_vcvtmq_s16_v:
4556   case NEON::BI__builtin_neon_vcvtmq_s32_v:
4557   case NEON::BI__builtin_neon_vcvtmq_s64_v:
4558   case NEON::BI__builtin_neon_vcvtmq_u16_v:
4559   case NEON::BI__builtin_neon_vcvtmq_u32_v:
4560   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
4561     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
4562     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
4563   }
4564   case NEON::BI__builtin_neon_vext_v:
4565   case NEON::BI__builtin_neon_vextq_v: {
4566     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
4567     SmallVector<uint32_t, 16> Indices;
4568     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4569       Indices.push_back(i+CV);
4570 
4571     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4572     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4573     return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
4574   }
4575   case NEON::BI__builtin_neon_vfma_v:
4576   case NEON::BI__builtin_neon_vfmaq_v: {
4577     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
4578     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4579     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4580     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4581 
4582     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
4583     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
4584   }
4585   case NEON::BI__builtin_neon_vld1_v:
4586   case NEON::BI__builtin_neon_vld1q_v: {
4587     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4588     Ops.push_back(getAlignmentValue32(PtrOp0));
4589     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
4590   }
4591   case NEON::BI__builtin_neon_vld2_v:
4592   case NEON::BI__builtin_neon_vld2q_v:
4593   case NEON::BI__builtin_neon_vld3_v:
4594   case NEON::BI__builtin_neon_vld3q_v:
4595   case NEON::BI__builtin_neon_vld4_v:
4596   case NEON::BI__builtin_neon_vld4q_v: {
4597     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4598     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4599     Value *Align = getAlignmentValue32(PtrOp1);
4600     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
4601     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4602     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4603     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4604   }
4605   case NEON::BI__builtin_neon_vld1_dup_v:
4606   case NEON::BI__builtin_neon_vld1q_dup_v: {
4607     Value *V = UndefValue::get(Ty);
4608     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
4609     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
4610     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
4611     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4612     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
4613     return EmitNeonSplat(Ops[0], CI);
4614   }
4615   case NEON::BI__builtin_neon_vld2_lane_v:
4616   case NEON::BI__builtin_neon_vld2q_lane_v:
4617   case NEON::BI__builtin_neon_vld3_lane_v:
4618   case NEON::BI__builtin_neon_vld3q_lane_v:
4619   case NEON::BI__builtin_neon_vld4_lane_v:
4620   case NEON::BI__builtin_neon_vld4q_lane_v: {
4621     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4622     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
4623     for (unsigned I = 2; I < Ops.size() - 1; ++I)
4624       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
4625     Ops.push_back(getAlignmentValue32(PtrOp1));
4626     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
4627     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4628     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4629     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4630   }
4631   case NEON::BI__builtin_neon_vmovl_v: {
4632     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
4633     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
4634     if (Usgn)
4635       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
4636     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
4637   }
4638   case NEON::BI__builtin_neon_vmovn_v: {
4639     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4640     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
4641     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
4642   }
4643   case NEON::BI__builtin_neon_vmull_v:
4644     // FIXME: the integer vmull operations could be emitted in terms of pure
4645     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
4646     // hoisting the exts outside loops. Until global ISel comes along that can
4647     // see through such movement this leads to bad CodeGen. So we need an
4648     // intrinsic for now.
4649     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
4650     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
4651     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
4652   case NEON::BI__builtin_neon_vpadal_v:
4653   case NEON::BI__builtin_neon_vpadalq_v: {
4654     // The source operand type has twice as many elements of half the size.
4655     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4656     llvm::Type *EltTy =
4657       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4658     llvm::Type *NarrowTy =
4659       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4660     llvm::Type *Tys[2] = { Ty, NarrowTy };
4661     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
4662   }
4663   case NEON::BI__builtin_neon_vpaddl_v:
4664   case NEON::BI__builtin_neon_vpaddlq_v: {
4665     // The source operand type has twice as many elements of half the size.
4666     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4667     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4668     llvm::Type *NarrowTy =
4669       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4670     llvm::Type *Tys[2] = { Ty, NarrowTy };
4671     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
4672   }
4673   case NEON::BI__builtin_neon_vqdmlal_v:
4674   case NEON::BI__builtin_neon_vqdmlsl_v: {
4675     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
4676     Ops[1] =
4677         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
4678     Ops.resize(2);
4679     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
4680   }
4681   case NEON::BI__builtin_neon_vqshl_n_v:
4682   case NEON::BI__builtin_neon_vqshlq_n_v:
4683     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
4684                         1, false);
4685   case NEON::BI__builtin_neon_vqshlu_n_v:
4686   case NEON::BI__builtin_neon_vqshluq_n_v:
4687     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
4688                         1, false);
4689   case NEON::BI__builtin_neon_vrecpe_v:
4690   case NEON::BI__builtin_neon_vrecpeq_v:
4691   case NEON::BI__builtin_neon_vrsqrte_v:
4692   case NEON::BI__builtin_neon_vrsqrteq_v:
4693     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
4694     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
4695 
4696   case NEON::BI__builtin_neon_vrshr_n_v:
4697   case NEON::BI__builtin_neon_vrshrq_n_v:
4698     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
4699                         1, true);
4700   case NEON::BI__builtin_neon_vshl_n_v:
4701   case NEON::BI__builtin_neon_vshlq_n_v:
4702     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
4703     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
4704                              "vshl_n");
4705   case NEON::BI__builtin_neon_vshll_n_v: {
4706     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
4707     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4708     if (Usgn)
4709       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
4710     else
4711       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
4712     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
4713     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
4714   }
4715   case NEON::BI__builtin_neon_vshrn_n_v: {
4716     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4717     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4718     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
4719     if (Usgn)
4720       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
4721     else
4722       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
4723     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
4724   }
4725   case NEON::BI__builtin_neon_vshr_n_v:
4726   case NEON::BI__builtin_neon_vshrq_n_v:
4727     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
4728   case NEON::BI__builtin_neon_vst1_v:
4729   case NEON::BI__builtin_neon_vst1q_v:
4730   case NEON::BI__builtin_neon_vst2_v:
4731   case NEON::BI__builtin_neon_vst2q_v:
4732   case NEON::BI__builtin_neon_vst3_v:
4733   case NEON::BI__builtin_neon_vst3q_v:
4734   case NEON::BI__builtin_neon_vst4_v:
4735   case NEON::BI__builtin_neon_vst4q_v:
4736   case NEON::BI__builtin_neon_vst2_lane_v:
4737   case NEON::BI__builtin_neon_vst2q_lane_v:
4738   case NEON::BI__builtin_neon_vst3_lane_v:
4739   case NEON::BI__builtin_neon_vst3q_lane_v:
4740   case NEON::BI__builtin_neon_vst4_lane_v:
4741   case NEON::BI__builtin_neon_vst4q_lane_v: {
4742     llvm::Type *Tys[] = {Int8PtrTy, Ty};
4743     Ops.push_back(getAlignmentValue32(PtrOp0));
4744     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
4745   }
4746   case NEON::BI__builtin_neon_vsubhn_v: {
4747     llvm::VectorType *SrcTy =
4748         llvm::VectorType::getExtendedElementVectorType(VTy);
4749 
4750     // %sum = add <4 x i32> %lhs, %rhs
4751     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4752     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4753     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
4754 
4755     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4756     Constant *ShiftAmt =
4757         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4758     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
4759 
4760     // %res = trunc <4 x i32> %high to <4 x i16>
4761     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
4762   }
4763   case NEON::BI__builtin_neon_vtrn_v:
4764   case NEON::BI__builtin_neon_vtrnq_v: {
4765     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4766     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4767     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4768     Value *SV = nullptr;
4769 
4770     for (unsigned vi = 0; vi != 2; ++vi) {
4771       SmallVector<uint32_t, 16> Indices;
4772       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4773         Indices.push_back(i+vi);
4774         Indices.push_back(i+e+vi);
4775       }
4776       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4777       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
4778       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4779     }
4780     return SV;
4781   }
4782   case NEON::BI__builtin_neon_vtst_v:
4783   case NEON::BI__builtin_neon_vtstq_v: {
4784     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4785     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4786     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4787     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4788                                 ConstantAggregateZero::get(Ty));
4789     return Builder.CreateSExt(Ops[0], Ty, "vtst");
4790   }
4791   case NEON::BI__builtin_neon_vuzp_v:
4792   case NEON::BI__builtin_neon_vuzpq_v: {
4793     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4794     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4795     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4796     Value *SV = nullptr;
4797 
4798     for (unsigned vi = 0; vi != 2; ++vi) {
4799       SmallVector<uint32_t, 16> Indices;
4800       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4801         Indices.push_back(2*i+vi);
4802 
4803       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4804       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
4805       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4806     }
4807     return SV;
4808   }
4809   case NEON::BI__builtin_neon_vzip_v:
4810   case NEON::BI__builtin_neon_vzipq_v: {
4811     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4812     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4813     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4814     Value *SV = nullptr;
4815 
4816     for (unsigned vi = 0; vi != 2; ++vi) {
4817       SmallVector<uint32_t, 16> Indices;
4818       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4819         Indices.push_back((i + vi*e) >> 1);
4820         Indices.push_back(((i + vi*e) >> 1)+e);
4821       }
4822       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4823       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
4824       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4825     }
4826     return SV;
4827   }
4828   }
4829 
4830   assert(Int && "Expected valid intrinsic number");
4831 
4832   // Determine the type(s) of this overloaded AArch64 intrinsic.
4833   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
4834 
4835   Value *Result = EmitNeonCall(F, Ops, NameHint);
4836   llvm::Type *ResultType = ConvertType(E->getType());
4837   // AArch64 intrinsic one-element vector type cast to
4838   // scalar type expected by the builtin
4839   return Builder.CreateBitCast(Result, ResultType, NameHint);
4840 }
4841 
4842 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
4843     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
4844     const CmpInst::Predicate Ip, const Twine &Name) {
4845   llvm::Type *OTy = Op->getType();
4846 
4847   // FIXME: this is utterly horrific. We should not be looking at previous
4848   // codegen context to find out what needs doing. Unfortunately TableGen
4849   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
4850   // (etc).
4851   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
4852     OTy = BI->getOperand(0)->getType();
4853 
4854   Op = Builder.CreateBitCast(Op, OTy);
4855   if (OTy->getScalarType()->isFloatingPointTy()) {
4856     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
4857   } else {
4858     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
4859   }
4860   return Builder.CreateSExt(Op, Ty, Name);
4861 }
4862 
4863 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
4864                                  Value *ExtOp, Value *IndexOp,
4865                                  llvm::Type *ResTy, unsigned IntID,
4866                                  const char *Name) {
4867   SmallVector<Value *, 2> TblOps;
4868   if (ExtOp)
4869     TblOps.push_back(ExtOp);
4870 
4871   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
4872   SmallVector<uint32_t, 16> Indices;
4873   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
4874   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
4875     Indices.push_back(2*i);
4876     Indices.push_back(2*i+1);
4877   }
4878 
4879   int PairPos = 0, End = Ops.size() - 1;
4880   while (PairPos < End) {
4881     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4882                                                      Ops[PairPos+1], Indices,
4883                                                      Name));
4884     PairPos += 2;
4885   }
4886 
4887   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
4888   // of the 128-bit lookup table with zero.
4889   if (PairPos == End) {
4890     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
4891     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4892                                                      ZeroTbl, Indices, Name));
4893   }
4894 
4895   Function *TblF;
4896   TblOps.push_back(IndexOp);
4897   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
4898 
4899   return CGF.EmitNeonCall(TblF, TblOps, Name);
4900 }
4901 
4902 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
4903   unsigned Value;
4904   switch (BuiltinID) {
4905   default:
4906     return nullptr;
4907   case ARM::BI__builtin_arm_nop:
4908     Value = 0;
4909     break;
4910   case ARM::BI__builtin_arm_yield:
4911   case ARM::BI__yield:
4912     Value = 1;
4913     break;
4914   case ARM::BI__builtin_arm_wfe:
4915   case ARM::BI__wfe:
4916     Value = 2;
4917     break;
4918   case ARM::BI__builtin_arm_wfi:
4919   case ARM::BI__wfi:
4920     Value = 3;
4921     break;
4922   case ARM::BI__builtin_arm_sev:
4923   case ARM::BI__sev:
4924     Value = 4;
4925     break;
4926   case ARM::BI__builtin_arm_sevl:
4927   case ARM::BI__sevl:
4928     Value = 5;
4929     break;
4930   }
4931 
4932   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
4933                             llvm::ConstantInt::get(Int32Ty, Value));
4934 }
4935 
4936 // Generates the IR for the read/write special register builtin,
4937 // ValueType is the type of the value that is to be written or read,
4938 // RegisterType is the type of the register being written to or read from.
4939 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
4940                                          const CallExpr *E,
4941                                          llvm::Type *RegisterType,
4942                                          llvm::Type *ValueType,
4943                                          bool IsRead,
4944                                          StringRef SysReg = "") {
4945   // write and register intrinsics only support 32 and 64 bit operations.
4946   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
4947           && "Unsupported size for register.");
4948 
4949   CodeGen::CGBuilderTy &Builder = CGF.Builder;
4950   CodeGen::CodeGenModule &CGM = CGF.CGM;
4951   LLVMContext &Context = CGM.getLLVMContext();
4952 
4953   if (SysReg.empty()) {
4954     const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
4955     SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
4956   }
4957 
4958   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
4959   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4960   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4961 
4962   llvm::Type *Types[] = { RegisterType };
4963 
4964   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
4965   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
4966             && "Can't fit 64-bit value in 32-bit register");
4967 
4968   if (IsRead) {
4969     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
4970     llvm::Value *Call = Builder.CreateCall(F, Metadata);
4971 
4972     if (MixedTypes)
4973       // Read into 64 bit register and then truncate result to 32 bit.
4974       return Builder.CreateTrunc(Call, ValueType);
4975 
4976     if (ValueType->isPointerTy())
4977       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
4978       return Builder.CreateIntToPtr(Call, ValueType);
4979 
4980     return Call;
4981   }
4982 
4983   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
4984   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
4985   if (MixedTypes) {
4986     // Extend 32 bit write value to 64 bit to pass to write.
4987     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
4988     return Builder.CreateCall(F, { Metadata, ArgValue });
4989   }
4990 
4991   if (ValueType->isPointerTy()) {
4992     // Have VoidPtrTy ArgValue but want to return an i32/i64.
4993     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
4994     return Builder.CreateCall(F, { Metadata, ArgValue });
4995   }
4996 
4997   return Builder.CreateCall(F, { Metadata, ArgValue });
4998 }
4999 
5000 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
5001 /// argument that specifies the vector type.
5002 static bool HasExtraNeonArgument(unsigned BuiltinID) {
5003   switch (BuiltinID) {
5004   default: break;
5005   case NEON::BI__builtin_neon_vget_lane_i8:
5006   case NEON::BI__builtin_neon_vget_lane_i16:
5007   case NEON::BI__builtin_neon_vget_lane_i32:
5008   case NEON::BI__builtin_neon_vget_lane_i64:
5009   case NEON::BI__builtin_neon_vget_lane_f32:
5010   case NEON::BI__builtin_neon_vgetq_lane_i8:
5011   case NEON::BI__builtin_neon_vgetq_lane_i16:
5012   case NEON::BI__builtin_neon_vgetq_lane_i32:
5013   case NEON::BI__builtin_neon_vgetq_lane_i64:
5014   case NEON::BI__builtin_neon_vgetq_lane_f32:
5015   case NEON::BI__builtin_neon_vset_lane_i8:
5016   case NEON::BI__builtin_neon_vset_lane_i16:
5017   case NEON::BI__builtin_neon_vset_lane_i32:
5018   case NEON::BI__builtin_neon_vset_lane_i64:
5019   case NEON::BI__builtin_neon_vset_lane_f32:
5020   case NEON::BI__builtin_neon_vsetq_lane_i8:
5021   case NEON::BI__builtin_neon_vsetq_lane_i16:
5022   case NEON::BI__builtin_neon_vsetq_lane_i32:
5023   case NEON::BI__builtin_neon_vsetq_lane_i64:
5024   case NEON::BI__builtin_neon_vsetq_lane_f32:
5025   case NEON::BI__builtin_neon_vsha1h_u32:
5026   case NEON::BI__builtin_neon_vsha1cq_u32:
5027   case NEON::BI__builtin_neon_vsha1pq_u32:
5028   case NEON::BI__builtin_neon_vsha1mq_u32:
5029   case clang::ARM::BI_MoveToCoprocessor:
5030   case clang::ARM::BI_MoveToCoprocessor2:
5031     return false;
5032   }
5033   return true;
5034 }
5035 
5036 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
5037                                            const CallExpr *E,
5038                                            llvm::Triple::ArchType Arch) {
5039   if (auto Hint = GetValueForARMHint(BuiltinID))
5040     return Hint;
5041 
5042   if (BuiltinID == ARM::BI__emit) {
5043     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
5044     llvm::FunctionType *FTy =
5045         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
5046 
5047     APSInt Value;
5048     if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
5049       llvm_unreachable("Sema will ensure that the parameter is constant");
5050 
5051     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
5052 
5053     llvm::InlineAsm *Emit =
5054         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
5055                                  /*SideEffects=*/true)
5056                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
5057                                  /*SideEffects=*/true);
5058 
5059     return Builder.CreateCall(Emit);
5060   }
5061 
5062   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
5063     Value *Option = EmitScalarExpr(E->getArg(0));
5064     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
5065   }
5066 
5067   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
5068     Value *Address = EmitScalarExpr(E->getArg(0));
5069     Value *RW      = EmitScalarExpr(E->getArg(1));
5070     Value *IsData  = EmitScalarExpr(E->getArg(2));
5071 
5072     // Locality is not supported on ARM target
5073     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
5074 
5075     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
5076     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5077   }
5078 
5079   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
5080     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5081     return Builder.CreateCall(
5082         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5083   }
5084 
5085   if (BuiltinID == ARM::BI__clear_cache) {
5086     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5087     const FunctionDecl *FD = E->getDirectCallee();
5088     Value *Ops[2];
5089     for (unsigned i = 0; i < 2; i++)
5090       Ops[i] = EmitScalarExpr(E->getArg(i));
5091     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5092     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5093     StringRef Name = FD->getName();
5094     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5095   }
5096 
5097   if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
5098       BuiltinID == ARM::BI__builtin_arm_mcrr2) {
5099     Function *F;
5100 
5101     switch (BuiltinID) {
5102     default: llvm_unreachable("unexpected builtin");
5103     case ARM::BI__builtin_arm_mcrr:
5104       F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
5105       break;
5106     case ARM::BI__builtin_arm_mcrr2:
5107       F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
5108       break;
5109     }
5110 
5111     // MCRR{2} instruction has 5 operands but
5112     // the intrinsic has 4 because Rt and Rt2
5113     // are represented as a single unsigned 64
5114     // bit integer in the intrinsic definition
5115     // but internally it's represented as 2 32
5116     // bit integers.
5117 
5118     Value *Coproc = EmitScalarExpr(E->getArg(0));
5119     Value *Opc1 = EmitScalarExpr(E->getArg(1));
5120     Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
5121     Value *CRm = EmitScalarExpr(E->getArg(3));
5122 
5123     Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
5124     Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
5125     Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
5126     Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
5127 
5128     return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
5129   }
5130 
5131   if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
5132       BuiltinID == ARM::BI__builtin_arm_mrrc2) {
5133     Function *F;
5134 
5135     switch (BuiltinID) {
5136     default: llvm_unreachable("unexpected builtin");
5137     case ARM::BI__builtin_arm_mrrc:
5138       F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
5139       break;
5140     case ARM::BI__builtin_arm_mrrc2:
5141       F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
5142       break;
5143     }
5144 
5145     Value *Coproc = EmitScalarExpr(E->getArg(0));
5146     Value *Opc1 = EmitScalarExpr(E->getArg(1));
5147     Value *CRm  = EmitScalarExpr(E->getArg(2));
5148     Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
5149 
5150     // Returns an unsigned 64 bit integer, represented
5151     // as two 32 bit integers.
5152 
5153     Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
5154     Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
5155     Rt = Builder.CreateZExt(Rt, Int64Ty);
5156     Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
5157 
5158     Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
5159     RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
5160     RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
5161 
5162     return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
5163   }
5164 
5165   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
5166       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
5167         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
5168        getContext().getTypeSize(E->getType()) == 64) ||
5169       BuiltinID == ARM::BI__ldrexd) {
5170     Function *F;
5171 
5172     switch (BuiltinID) {
5173     default: llvm_unreachable("unexpected builtin");
5174     case ARM::BI__builtin_arm_ldaex:
5175       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
5176       break;
5177     case ARM::BI__builtin_arm_ldrexd:
5178     case ARM::BI__builtin_arm_ldrex:
5179     case ARM::BI__ldrexd:
5180       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
5181       break;
5182     }
5183 
5184     Value *LdPtr = EmitScalarExpr(E->getArg(0));
5185     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
5186                                     "ldrexd");
5187 
5188     Value *Val0 = Builder.CreateExtractValue(Val, 1);
5189     Value *Val1 = Builder.CreateExtractValue(Val, 0);
5190     Val0 = Builder.CreateZExt(Val0, Int64Ty);
5191     Val1 = Builder.CreateZExt(Val1, Int64Ty);
5192 
5193     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
5194     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5195     Val = Builder.CreateOr(Val, Val1);
5196     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5197   }
5198 
5199   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
5200       BuiltinID == ARM::BI__builtin_arm_ldaex) {
5201     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5202 
5203     QualType Ty = E->getType();
5204     llvm::Type *RealResTy = ConvertType(Ty);
5205     llvm::Type *PtrTy = llvm::IntegerType::get(
5206         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
5207     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
5208 
5209     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
5210                                        ? Intrinsic::arm_ldaex
5211                                        : Intrinsic::arm_ldrex,
5212                                    PtrTy);
5213     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
5214 
5215     if (RealResTy->isPointerTy())
5216       return Builder.CreateIntToPtr(Val, RealResTy);
5217     else {
5218       llvm::Type *IntResTy = llvm::IntegerType::get(
5219           getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5220       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
5221       return Builder.CreateBitCast(Val, RealResTy);
5222     }
5223   }
5224 
5225   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
5226       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
5227         BuiltinID == ARM::BI__builtin_arm_strex) &&
5228        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
5229     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
5230                                        ? Intrinsic::arm_stlexd
5231                                        : Intrinsic::arm_strexd);
5232     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
5233 
5234     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5235     Value *Val = EmitScalarExpr(E->getArg(0));
5236     Builder.CreateStore(Val, Tmp);
5237 
5238     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
5239     Val = Builder.CreateLoad(LdPtr);
5240 
5241     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5242     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5243     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
5244     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
5245   }
5246 
5247   if (BuiltinID == ARM::BI__builtin_arm_strex ||
5248       BuiltinID == ARM::BI__builtin_arm_stlex) {
5249     Value *StoreVal = EmitScalarExpr(E->getArg(0));
5250     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5251 
5252     QualType Ty = E->getArg(0)->getType();
5253     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
5254                                                  getContext().getTypeSize(Ty));
5255     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
5256 
5257     if (StoreVal->getType()->isPointerTy())
5258       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
5259     else {
5260       llvm::Type *IntTy = llvm::IntegerType::get(
5261           getLLVMContext(),
5262           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5263       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5264       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
5265     }
5266 
5267     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
5268                                        ? Intrinsic::arm_stlex
5269                                        : Intrinsic::arm_strex,
5270                                    StoreAddr->getType());
5271     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
5272   }
5273 
5274   switch (BuiltinID) {
5275   case ARM::BI__iso_volatile_load8:
5276   case ARM::BI__iso_volatile_load16:
5277   case ARM::BI__iso_volatile_load32:
5278   case ARM::BI__iso_volatile_load64: {
5279     Value *Ptr = EmitScalarExpr(E->getArg(0));
5280     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
5281     CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
5282     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
5283                                              LoadSize.getQuantity() * 8);
5284     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
5285     llvm::LoadInst *Load =
5286       Builder.CreateAlignedLoad(Ptr, LoadSize);
5287     Load->setVolatile(true);
5288     return Load;
5289   }
5290   case ARM::BI__iso_volatile_store8:
5291   case ARM::BI__iso_volatile_store16:
5292   case ARM::BI__iso_volatile_store32:
5293   case ARM::BI__iso_volatile_store64: {
5294     Value *Ptr = EmitScalarExpr(E->getArg(0));
5295     Value *Value = EmitScalarExpr(E->getArg(1));
5296     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
5297     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
5298     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
5299                                              StoreSize.getQuantity() * 8);
5300     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
5301     llvm::StoreInst *Store =
5302       Builder.CreateAlignedStore(Value, Ptr,
5303                                  StoreSize);
5304     Store->setVolatile(true);
5305     return Store;
5306   }
5307   }
5308 
5309   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
5310     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
5311     return Builder.CreateCall(F);
5312   }
5313 
5314   // CRC32
5315   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5316   switch (BuiltinID) {
5317   case ARM::BI__builtin_arm_crc32b:
5318     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
5319   case ARM::BI__builtin_arm_crc32cb:
5320     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
5321   case ARM::BI__builtin_arm_crc32h:
5322     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
5323   case ARM::BI__builtin_arm_crc32ch:
5324     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
5325   case ARM::BI__builtin_arm_crc32w:
5326   case ARM::BI__builtin_arm_crc32d:
5327     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
5328   case ARM::BI__builtin_arm_crc32cw:
5329   case ARM::BI__builtin_arm_crc32cd:
5330     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
5331   }
5332 
5333   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5334     Value *Arg0 = EmitScalarExpr(E->getArg(0));
5335     Value *Arg1 = EmitScalarExpr(E->getArg(1));
5336 
5337     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
5338     // intrinsics, hence we need different codegen for these cases.
5339     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
5340         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
5341       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
5342       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
5343       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
5344       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
5345 
5346       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5347       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
5348       return Builder.CreateCall(F, {Res, Arg1b});
5349     } else {
5350       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
5351 
5352       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5353       return Builder.CreateCall(F, {Arg0, Arg1});
5354     }
5355   }
5356 
5357   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
5358       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
5359       BuiltinID == ARM::BI__builtin_arm_rsrp ||
5360       BuiltinID == ARM::BI__builtin_arm_wsr ||
5361       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
5362       BuiltinID == ARM::BI__builtin_arm_wsrp) {
5363 
5364     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
5365                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
5366                   BuiltinID == ARM::BI__builtin_arm_rsrp;
5367 
5368     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
5369                             BuiltinID == ARM::BI__builtin_arm_wsrp;
5370 
5371     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
5372                    BuiltinID == ARM::BI__builtin_arm_wsr64;
5373 
5374     llvm::Type *ValueType;
5375     llvm::Type *RegisterType;
5376     if (IsPointerBuiltin) {
5377       ValueType = VoidPtrTy;
5378       RegisterType = Int32Ty;
5379     } else if (Is64Bit) {
5380       ValueType = RegisterType = Int64Ty;
5381     } else {
5382       ValueType = RegisterType = Int32Ty;
5383     }
5384 
5385     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5386   }
5387 
5388   // Find out if any arguments are required to be integer constant
5389   // expressions.
5390   unsigned ICEArguments = 0;
5391   ASTContext::GetBuiltinTypeError Error;
5392   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5393   assert(Error == ASTContext::GE_None && "Should not codegen an error");
5394 
5395   auto getAlignmentValue32 = [&](Address addr) -> Value* {
5396     return Builder.getInt32(addr.getAlignment().getQuantity());
5397   };
5398 
5399   Address PtrOp0 = Address::invalid();
5400   Address PtrOp1 = Address::invalid();
5401   SmallVector<Value*, 4> Ops;
5402   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
5403   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
5404   for (unsigned i = 0, e = NumArgs; i != e; i++) {
5405     if (i == 0) {
5406       switch (BuiltinID) {
5407       case NEON::BI__builtin_neon_vld1_v:
5408       case NEON::BI__builtin_neon_vld1q_v:
5409       case NEON::BI__builtin_neon_vld1q_lane_v:
5410       case NEON::BI__builtin_neon_vld1_lane_v:
5411       case NEON::BI__builtin_neon_vld1_dup_v:
5412       case NEON::BI__builtin_neon_vld1q_dup_v:
5413       case NEON::BI__builtin_neon_vst1_v:
5414       case NEON::BI__builtin_neon_vst1q_v:
5415       case NEON::BI__builtin_neon_vst1q_lane_v:
5416       case NEON::BI__builtin_neon_vst1_lane_v:
5417       case NEON::BI__builtin_neon_vst2_v:
5418       case NEON::BI__builtin_neon_vst2q_v:
5419       case NEON::BI__builtin_neon_vst2_lane_v:
5420       case NEON::BI__builtin_neon_vst2q_lane_v:
5421       case NEON::BI__builtin_neon_vst3_v:
5422       case NEON::BI__builtin_neon_vst3q_v:
5423       case NEON::BI__builtin_neon_vst3_lane_v:
5424       case NEON::BI__builtin_neon_vst3q_lane_v:
5425       case NEON::BI__builtin_neon_vst4_v:
5426       case NEON::BI__builtin_neon_vst4q_v:
5427       case NEON::BI__builtin_neon_vst4_lane_v:
5428       case NEON::BI__builtin_neon_vst4q_lane_v:
5429         // Get the alignment for the argument in addition to the value;
5430         // we'll use it later.
5431         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
5432         Ops.push_back(PtrOp0.getPointer());
5433         continue;
5434       }
5435     }
5436     if (i == 1) {
5437       switch (BuiltinID) {
5438       case NEON::BI__builtin_neon_vld2_v:
5439       case NEON::BI__builtin_neon_vld2q_v:
5440       case NEON::BI__builtin_neon_vld3_v:
5441       case NEON::BI__builtin_neon_vld3q_v:
5442       case NEON::BI__builtin_neon_vld4_v:
5443       case NEON::BI__builtin_neon_vld4q_v:
5444       case NEON::BI__builtin_neon_vld2_lane_v:
5445       case NEON::BI__builtin_neon_vld2q_lane_v:
5446       case NEON::BI__builtin_neon_vld3_lane_v:
5447       case NEON::BI__builtin_neon_vld3q_lane_v:
5448       case NEON::BI__builtin_neon_vld4_lane_v:
5449       case NEON::BI__builtin_neon_vld4q_lane_v:
5450       case NEON::BI__builtin_neon_vld2_dup_v:
5451       case NEON::BI__builtin_neon_vld3_dup_v:
5452       case NEON::BI__builtin_neon_vld4_dup_v:
5453         // Get the alignment for the argument in addition to the value;
5454         // we'll use it later.
5455         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
5456         Ops.push_back(PtrOp1.getPointer());
5457         continue;
5458       }
5459     }
5460 
5461     if ((ICEArguments & (1 << i)) == 0) {
5462       Ops.push_back(EmitScalarExpr(E->getArg(i)));
5463     } else {
5464       // If this is required to be a constant, constant fold it so that we know
5465       // that the generated intrinsic gets a ConstantInt.
5466       llvm::APSInt Result;
5467       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5468       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
5469       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5470     }
5471   }
5472 
5473   switch (BuiltinID) {
5474   default: break;
5475 
5476   case NEON::BI__builtin_neon_vget_lane_i8:
5477   case NEON::BI__builtin_neon_vget_lane_i16:
5478   case NEON::BI__builtin_neon_vget_lane_i32:
5479   case NEON::BI__builtin_neon_vget_lane_i64:
5480   case NEON::BI__builtin_neon_vget_lane_f32:
5481   case NEON::BI__builtin_neon_vgetq_lane_i8:
5482   case NEON::BI__builtin_neon_vgetq_lane_i16:
5483   case NEON::BI__builtin_neon_vgetq_lane_i32:
5484   case NEON::BI__builtin_neon_vgetq_lane_i64:
5485   case NEON::BI__builtin_neon_vgetq_lane_f32:
5486     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
5487 
5488   case NEON::BI__builtin_neon_vset_lane_i8:
5489   case NEON::BI__builtin_neon_vset_lane_i16:
5490   case NEON::BI__builtin_neon_vset_lane_i32:
5491   case NEON::BI__builtin_neon_vset_lane_i64:
5492   case NEON::BI__builtin_neon_vset_lane_f32:
5493   case NEON::BI__builtin_neon_vsetq_lane_i8:
5494   case NEON::BI__builtin_neon_vsetq_lane_i16:
5495   case NEON::BI__builtin_neon_vsetq_lane_i32:
5496   case NEON::BI__builtin_neon_vsetq_lane_i64:
5497   case NEON::BI__builtin_neon_vsetq_lane_f32:
5498     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5499 
5500   case NEON::BI__builtin_neon_vsha1h_u32:
5501     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
5502                         "vsha1h");
5503   case NEON::BI__builtin_neon_vsha1cq_u32:
5504     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
5505                         "vsha1h");
5506   case NEON::BI__builtin_neon_vsha1pq_u32:
5507     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
5508                         "vsha1h");
5509   case NEON::BI__builtin_neon_vsha1mq_u32:
5510     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
5511                         "vsha1h");
5512 
5513   // The ARM _MoveToCoprocessor builtins put the input register value as
5514   // the first argument, but the LLVM intrinsic expects it as the third one.
5515   case ARM::BI_MoveToCoprocessor:
5516   case ARM::BI_MoveToCoprocessor2: {
5517     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
5518                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
5519     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
5520                                   Ops[3], Ops[4], Ops[5]});
5521   }
5522   case ARM::BI_BitScanForward:
5523   case ARM::BI_BitScanForward64:
5524     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
5525   case ARM::BI_BitScanReverse:
5526   case ARM::BI_BitScanReverse64:
5527     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
5528 
5529   case ARM::BI_InterlockedAnd64:
5530     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
5531   case ARM::BI_InterlockedExchange64:
5532     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
5533   case ARM::BI_InterlockedExchangeAdd64:
5534     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
5535   case ARM::BI_InterlockedExchangeSub64:
5536     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
5537   case ARM::BI_InterlockedOr64:
5538     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
5539   case ARM::BI_InterlockedXor64:
5540     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
5541   case ARM::BI_InterlockedDecrement64:
5542     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
5543   case ARM::BI_InterlockedIncrement64:
5544     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
5545   }
5546 
5547   // Get the last argument, which specifies the vector type.
5548   assert(HasExtraArg);
5549   llvm::APSInt Result;
5550   const Expr *Arg = E->getArg(E->getNumArgs()-1);
5551   if (!Arg->isIntegerConstantExpr(Result, getContext()))
5552     return nullptr;
5553 
5554   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
5555       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
5556     // Determine the overloaded type of this builtin.
5557     llvm::Type *Ty;
5558     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
5559       Ty = FloatTy;
5560     else
5561       Ty = DoubleTy;
5562 
5563     // Determine whether this is an unsigned conversion or not.
5564     bool usgn = Result.getZExtValue() == 1;
5565     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
5566 
5567     // Call the appropriate intrinsic.
5568     Function *F = CGM.getIntrinsic(Int, Ty);
5569     return Builder.CreateCall(F, Ops, "vcvtr");
5570   }
5571 
5572   // Determine the type of this overloaded NEON intrinsic.
5573   NeonTypeFlags Type(Result.getZExtValue());
5574   bool usgn = Type.isUnsigned();
5575   bool rightShift = false;
5576 
5577   llvm::VectorType *VTy = GetNeonType(this, Type,
5578                                       getTarget().hasLegalHalfType());
5579   llvm::Type *Ty = VTy;
5580   if (!Ty)
5581     return nullptr;
5582 
5583   // Many NEON builtins have identical semantics and uses in ARM and
5584   // AArch64. Emit these in a single function.
5585   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
5586   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5587       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
5588   if (Builtin)
5589     return EmitCommonNeonBuiltinExpr(
5590         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5591         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
5592 
5593   unsigned Int;
5594   switch (BuiltinID) {
5595   default: return nullptr;
5596   case NEON::BI__builtin_neon_vld1q_lane_v:
5597     // Handle 64-bit integer elements as a special case.  Use shuffles of
5598     // one-element vectors to avoid poor code for i64 in the backend.
5599     if (VTy->getElementType()->isIntegerTy(64)) {
5600       // Extract the other lane.
5601       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5602       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
5603       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
5604       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5605       // Load the value as a one-element vector.
5606       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
5607       llvm::Type *Tys[] = {Ty, Int8PtrTy};
5608       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
5609       Value *Align = getAlignmentValue32(PtrOp0);
5610       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
5611       // Combine them.
5612       uint32_t Indices[] = {1 - Lane, Lane};
5613       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
5614       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
5615     }
5616     LLVM_FALLTHROUGH;
5617   case NEON::BI__builtin_neon_vld1_lane_v: {
5618     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5619     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
5620     Value *Ld = Builder.CreateLoad(PtrOp0);
5621     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
5622   }
5623   case NEON::BI__builtin_neon_vld2_dup_v:
5624   case NEON::BI__builtin_neon_vld3_dup_v:
5625   case NEON::BI__builtin_neon_vld4_dup_v: {
5626     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
5627     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
5628       switch (BuiltinID) {
5629       case NEON::BI__builtin_neon_vld2_dup_v:
5630         Int = Intrinsic::arm_neon_vld2;
5631         break;
5632       case NEON::BI__builtin_neon_vld3_dup_v:
5633         Int = Intrinsic::arm_neon_vld3;
5634         break;
5635       case NEON::BI__builtin_neon_vld4_dup_v:
5636         Int = Intrinsic::arm_neon_vld4;
5637         break;
5638       default: llvm_unreachable("unknown vld_dup intrinsic?");
5639       }
5640       llvm::Type *Tys[] = {Ty, Int8PtrTy};
5641       Function *F = CGM.getIntrinsic(Int, Tys);
5642       llvm::Value *Align = getAlignmentValue32(PtrOp1);
5643       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
5644       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5645       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5646       return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5647     }
5648     switch (BuiltinID) {
5649     case NEON::BI__builtin_neon_vld2_dup_v:
5650       Int = Intrinsic::arm_neon_vld2lane;
5651       break;
5652     case NEON::BI__builtin_neon_vld3_dup_v:
5653       Int = Intrinsic::arm_neon_vld3lane;
5654       break;
5655     case NEON::BI__builtin_neon_vld4_dup_v:
5656       Int = Intrinsic::arm_neon_vld4lane;
5657       break;
5658     default: llvm_unreachable("unknown vld_dup intrinsic?");
5659     }
5660     llvm::Type *Tys[] = {Ty, Int8PtrTy};
5661     Function *F = CGM.getIntrinsic(Int, Tys);
5662     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
5663 
5664     SmallVector<Value*, 6> Args;
5665     Args.push_back(Ops[1]);
5666     Args.append(STy->getNumElements(), UndefValue::get(Ty));
5667 
5668     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
5669     Args.push_back(CI);
5670     Args.push_back(getAlignmentValue32(PtrOp1));
5671 
5672     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
5673     // splat lane 0 to all elts in each vector of the result.
5674     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5675       Value *Val = Builder.CreateExtractValue(Ops[1], i);
5676       Value *Elt = Builder.CreateBitCast(Val, Ty);
5677       Elt = EmitNeonSplat(Elt, CI);
5678       Elt = Builder.CreateBitCast(Elt, Val->getType());
5679       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
5680     }
5681     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5682     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5683     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5684   }
5685   case NEON::BI__builtin_neon_vqrshrn_n_v:
5686     Int =
5687       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
5688     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
5689                         1, true);
5690   case NEON::BI__builtin_neon_vqrshrun_n_v:
5691     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
5692                         Ops, "vqrshrun_n", 1, true);
5693   case NEON::BI__builtin_neon_vqshrn_n_v:
5694     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
5695     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
5696                         1, true);
5697   case NEON::BI__builtin_neon_vqshrun_n_v:
5698     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
5699                         Ops, "vqshrun_n", 1, true);
5700   case NEON::BI__builtin_neon_vrecpe_v:
5701   case NEON::BI__builtin_neon_vrecpeq_v:
5702     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
5703                         Ops, "vrecpe");
5704   case NEON::BI__builtin_neon_vrshrn_n_v:
5705     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
5706                         Ops, "vrshrn_n", 1, true);
5707   case NEON::BI__builtin_neon_vrsra_n_v:
5708   case NEON::BI__builtin_neon_vrsraq_n_v:
5709     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5710     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5711     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
5712     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
5713     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
5714     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
5715   case NEON::BI__builtin_neon_vsri_n_v:
5716   case NEON::BI__builtin_neon_vsriq_n_v:
5717     rightShift = true;
5718     LLVM_FALLTHROUGH;
5719   case NEON::BI__builtin_neon_vsli_n_v:
5720   case NEON::BI__builtin_neon_vsliq_n_v:
5721     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
5722     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
5723                         Ops, "vsli_n");
5724   case NEON::BI__builtin_neon_vsra_n_v:
5725   case NEON::BI__builtin_neon_vsraq_n_v:
5726     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5727     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
5728     return Builder.CreateAdd(Ops[0], Ops[1]);
5729   case NEON::BI__builtin_neon_vst1q_lane_v:
5730     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
5731     // a one-element vector and avoid poor code for i64 in the backend.
5732     if (VTy->getElementType()->isIntegerTy(64)) {
5733       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5734       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
5735       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5736       Ops[2] = getAlignmentValue32(PtrOp0);
5737       llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
5738       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
5739                                                  Tys), Ops);
5740     }
5741     LLVM_FALLTHROUGH;
5742   case NEON::BI__builtin_neon_vst1_lane_v: {
5743     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5744     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
5745     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5746     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
5747     return St;
5748   }
5749   case NEON::BI__builtin_neon_vtbl1_v:
5750     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
5751                         Ops, "vtbl1");
5752   case NEON::BI__builtin_neon_vtbl2_v:
5753     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
5754                         Ops, "vtbl2");
5755   case NEON::BI__builtin_neon_vtbl3_v:
5756     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
5757                         Ops, "vtbl3");
5758   case NEON::BI__builtin_neon_vtbl4_v:
5759     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
5760                         Ops, "vtbl4");
5761   case NEON::BI__builtin_neon_vtbx1_v:
5762     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
5763                         Ops, "vtbx1");
5764   case NEON::BI__builtin_neon_vtbx2_v:
5765     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
5766                         Ops, "vtbx2");
5767   case NEON::BI__builtin_neon_vtbx3_v:
5768     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
5769                         Ops, "vtbx3");
5770   case NEON::BI__builtin_neon_vtbx4_v:
5771     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
5772                         Ops, "vtbx4");
5773   }
5774 }
5775 
5776 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
5777                                       const CallExpr *E,
5778                                       SmallVectorImpl<Value *> &Ops,
5779                                       llvm::Triple::ArchType Arch) {
5780   unsigned int Int = 0;
5781   const char *s = nullptr;
5782 
5783   switch (BuiltinID) {
5784   default:
5785     return nullptr;
5786   case NEON::BI__builtin_neon_vtbl1_v:
5787   case NEON::BI__builtin_neon_vqtbl1_v:
5788   case NEON::BI__builtin_neon_vqtbl1q_v:
5789   case NEON::BI__builtin_neon_vtbl2_v:
5790   case NEON::BI__builtin_neon_vqtbl2_v:
5791   case NEON::BI__builtin_neon_vqtbl2q_v:
5792   case NEON::BI__builtin_neon_vtbl3_v:
5793   case NEON::BI__builtin_neon_vqtbl3_v:
5794   case NEON::BI__builtin_neon_vqtbl3q_v:
5795   case NEON::BI__builtin_neon_vtbl4_v:
5796   case NEON::BI__builtin_neon_vqtbl4_v:
5797   case NEON::BI__builtin_neon_vqtbl4q_v:
5798     break;
5799   case NEON::BI__builtin_neon_vtbx1_v:
5800   case NEON::BI__builtin_neon_vqtbx1_v:
5801   case NEON::BI__builtin_neon_vqtbx1q_v:
5802   case NEON::BI__builtin_neon_vtbx2_v:
5803   case NEON::BI__builtin_neon_vqtbx2_v:
5804   case NEON::BI__builtin_neon_vqtbx2q_v:
5805   case NEON::BI__builtin_neon_vtbx3_v:
5806   case NEON::BI__builtin_neon_vqtbx3_v:
5807   case NEON::BI__builtin_neon_vqtbx3q_v:
5808   case NEON::BI__builtin_neon_vtbx4_v:
5809   case NEON::BI__builtin_neon_vqtbx4_v:
5810   case NEON::BI__builtin_neon_vqtbx4q_v:
5811     break;
5812   }
5813 
5814   assert(E->getNumArgs() >= 3);
5815 
5816   // Get the last argument, which specifies the vector type.
5817   llvm::APSInt Result;
5818   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
5819   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
5820     return nullptr;
5821 
5822   // Determine the type of this overloaded NEON intrinsic.
5823   NeonTypeFlags Type(Result.getZExtValue());
5824   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
5825   if (!Ty)
5826     return nullptr;
5827 
5828   CodeGen::CGBuilderTy &Builder = CGF.Builder;
5829 
5830   // AArch64 scalar builtins are not overloaded, they do not have an extra
5831   // argument that specifies the vector type, need to handle each case.
5832   switch (BuiltinID) {
5833   case NEON::BI__builtin_neon_vtbl1_v: {
5834     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
5835                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
5836                               "vtbl1");
5837   }
5838   case NEON::BI__builtin_neon_vtbl2_v: {
5839     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
5840                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
5841                               "vtbl1");
5842   }
5843   case NEON::BI__builtin_neon_vtbl3_v: {
5844     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
5845                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
5846                               "vtbl2");
5847   }
5848   case NEON::BI__builtin_neon_vtbl4_v: {
5849     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
5850                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
5851                               "vtbl2");
5852   }
5853   case NEON::BI__builtin_neon_vtbx1_v: {
5854     Value *TblRes =
5855         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
5856                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
5857 
5858     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
5859     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
5860     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5861 
5862     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5863     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5864     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5865   }
5866   case NEON::BI__builtin_neon_vtbx2_v: {
5867     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
5868                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
5869                               "vtbx1");
5870   }
5871   case NEON::BI__builtin_neon_vtbx3_v: {
5872     Value *TblRes =
5873         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
5874                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
5875 
5876     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
5877     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
5878                                            TwentyFourV);
5879     CmpRes = Builder.CreateSExt(CmpRes, Ty);
5880 
5881     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5882     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5883     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5884   }
5885   case NEON::BI__builtin_neon_vtbx4_v: {
5886     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
5887                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
5888                               "vtbx2");
5889   }
5890   case NEON::BI__builtin_neon_vqtbl1_v:
5891   case NEON::BI__builtin_neon_vqtbl1q_v:
5892     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
5893   case NEON::BI__builtin_neon_vqtbl2_v:
5894   case NEON::BI__builtin_neon_vqtbl2q_v: {
5895     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
5896   case NEON::BI__builtin_neon_vqtbl3_v:
5897   case NEON::BI__builtin_neon_vqtbl3q_v:
5898     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
5899   case NEON::BI__builtin_neon_vqtbl4_v:
5900   case NEON::BI__builtin_neon_vqtbl4q_v:
5901     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
5902   case NEON::BI__builtin_neon_vqtbx1_v:
5903   case NEON::BI__builtin_neon_vqtbx1q_v:
5904     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
5905   case NEON::BI__builtin_neon_vqtbx2_v:
5906   case NEON::BI__builtin_neon_vqtbx2q_v:
5907     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
5908   case NEON::BI__builtin_neon_vqtbx3_v:
5909   case NEON::BI__builtin_neon_vqtbx3q_v:
5910     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
5911   case NEON::BI__builtin_neon_vqtbx4_v:
5912   case NEON::BI__builtin_neon_vqtbx4q_v:
5913     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
5914   }
5915   }
5916 
5917   if (!Int)
5918     return nullptr;
5919 
5920   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
5921   return CGF.EmitNeonCall(F, Ops, s);
5922 }
5923 
5924 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
5925   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
5926   Op = Builder.CreateBitCast(Op, Int16Ty);
5927   Value *V = UndefValue::get(VTy);
5928   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
5929   Op = Builder.CreateInsertElement(V, Op, CI);
5930   return Op;
5931 }
5932 
5933 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
5934                                                const CallExpr *E,
5935                                                llvm::Triple::ArchType Arch) {
5936   unsigned HintID = static_cast<unsigned>(-1);
5937   switch (BuiltinID) {
5938   default: break;
5939   case AArch64::BI__builtin_arm_nop:
5940     HintID = 0;
5941     break;
5942   case AArch64::BI__builtin_arm_yield:
5943     HintID = 1;
5944     break;
5945   case AArch64::BI__builtin_arm_wfe:
5946     HintID = 2;
5947     break;
5948   case AArch64::BI__builtin_arm_wfi:
5949     HintID = 3;
5950     break;
5951   case AArch64::BI__builtin_arm_sev:
5952     HintID = 4;
5953     break;
5954   case AArch64::BI__builtin_arm_sevl:
5955     HintID = 5;
5956     break;
5957   }
5958 
5959   if (HintID != static_cast<unsigned>(-1)) {
5960     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5961     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5962   }
5963 
5964   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
5965     Value *Address         = EmitScalarExpr(E->getArg(0));
5966     Value *RW              = EmitScalarExpr(E->getArg(1));
5967     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
5968     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
5969     Value *IsData          = EmitScalarExpr(E->getArg(4));
5970 
5971     Value *Locality = nullptr;
5972     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
5973       // Temporal fetch, needs to convert cache level to locality.
5974       Locality = llvm::ConstantInt::get(Int32Ty,
5975         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
5976     } else {
5977       // Streaming fetch.
5978       Locality = llvm::ConstantInt::get(Int32Ty, 0);
5979     }
5980 
5981     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
5982     // PLDL3STRM or PLDL2STRM.
5983     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
5984     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5985   }
5986 
5987   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
5988     assert((getContext().getTypeSize(E->getType()) == 32) &&
5989            "rbit of unusual size!");
5990     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5991     return Builder.CreateCall(
5992         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5993   }
5994   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
5995     assert((getContext().getTypeSize(E->getType()) == 64) &&
5996            "rbit of unusual size!");
5997     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5998     return Builder.CreateCall(
5999         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
6000   }
6001 
6002   if (BuiltinID == AArch64::BI__clear_cache) {
6003     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
6004     const FunctionDecl *FD = E->getDirectCallee();
6005     Value *Ops[2];
6006     for (unsigned i = 0; i < 2; i++)
6007       Ops[i] = EmitScalarExpr(E->getArg(i));
6008     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
6009     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
6010     StringRef Name = FD->getName();
6011     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
6012   }
6013 
6014   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
6015       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
6016       getContext().getTypeSize(E->getType()) == 128) {
6017     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
6018                                        ? Intrinsic::aarch64_ldaxp
6019                                        : Intrinsic::aarch64_ldxp);
6020 
6021     Value *LdPtr = EmitScalarExpr(E->getArg(0));
6022     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
6023                                     "ldxp");
6024 
6025     Value *Val0 = Builder.CreateExtractValue(Val, 1);
6026     Value *Val1 = Builder.CreateExtractValue(Val, 0);
6027     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
6028     Val0 = Builder.CreateZExt(Val0, Int128Ty);
6029     Val1 = Builder.CreateZExt(Val1, Int128Ty);
6030 
6031     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
6032     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
6033     Val = Builder.CreateOr(Val, Val1);
6034     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
6035   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
6036              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
6037     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
6038 
6039     QualType Ty = E->getType();
6040     llvm::Type *RealResTy = ConvertType(Ty);
6041     llvm::Type *PtrTy = llvm::IntegerType::get(
6042         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
6043     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
6044 
6045     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
6046                                        ? Intrinsic::aarch64_ldaxr
6047                                        : Intrinsic::aarch64_ldxr,
6048                                    PtrTy);
6049     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
6050 
6051     if (RealResTy->isPointerTy())
6052       return Builder.CreateIntToPtr(Val, RealResTy);
6053 
6054     llvm::Type *IntResTy = llvm::IntegerType::get(
6055         getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
6056     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
6057     return Builder.CreateBitCast(Val, RealResTy);
6058   }
6059 
6060   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
6061        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
6062       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
6063     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
6064                                        ? Intrinsic::aarch64_stlxp
6065                                        : Intrinsic::aarch64_stxp);
6066     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
6067 
6068     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
6069     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
6070 
6071     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
6072     llvm::Value *Val = Builder.CreateLoad(Tmp);
6073 
6074     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
6075     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
6076     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
6077                                          Int8PtrTy);
6078     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
6079   }
6080 
6081   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
6082       BuiltinID == AArch64::BI__builtin_arm_stlex) {
6083     Value *StoreVal = EmitScalarExpr(E->getArg(0));
6084     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
6085 
6086     QualType Ty = E->getArg(0)->getType();
6087     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
6088                                                  getContext().getTypeSize(Ty));
6089     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
6090 
6091     if (StoreVal->getType()->isPointerTy())
6092       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
6093     else {
6094       llvm::Type *IntTy = llvm::IntegerType::get(
6095           getLLVMContext(),
6096           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
6097       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
6098       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
6099     }
6100 
6101     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
6102                                        ? Intrinsic::aarch64_stlxr
6103                                        : Intrinsic::aarch64_stxr,
6104                                    StoreAddr->getType());
6105     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
6106   }
6107 
6108   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
6109     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
6110     return Builder.CreateCall(F);
6111   }
6112 
6113   // CRC32
6114   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
6115   switch (BuiltinID) {
6116   case AArch64::BI__builtin_arm_crc32b:
6117     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
6118   case AArch64::BI__builtin_arm_crc32cb:
6119     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
6120   case AArch64::BI__builtin_arm_crc32h:
6121     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
6122   case AArch64::BI__builtin_arm_crc32ch:
6123     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
6124   case AArch64::BI__builtin_arm_crc32w:
6125     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
6126   case AArch64::BI__builtin_arm_crc32cw:
6127     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
6128   case AArch64::BI__builtin_arm_crc32d:
6129     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
6130   case AArch64::BI__builtin_arm_crc32cd:
6131     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
6132   }
6133 
6134   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
6135     Value *Arg0 = EmitScalarExpr(E->getArg(0));
6136     Value *Arg1 = EmitScalarExpr(E->getArg(1));
6137     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
6138 
6139     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
6140     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
6141 
6142     return Builder.CreateCall(F, {Arg0, Arg1});
6143   }
6144 
6145   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
6146       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
6147       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
6148       BuiltinID == AArch64::BI__builtin_arm_wsr ||
6149       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
6150       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
6151 
6152     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
6153                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
6154                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
6155 
6156     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
6157                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
6158 
6159     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
6160                    BuiltinID != AArch64::BI__builtin_arm_wsr;
6161 
6162     llvm::Type *ValueType;
6163     llvm::Type *RegisterType = Int64Ty;
6164     if (IsPointerBuiltin) {
6165       ValueType = VoidPtrTy;
6166     } else if (Is64Bit) {
6167       ValueType = Int64Ty;
6168     } else {
6169       ValueType = Int32Ty;
6170     }
6171 
6172     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
6173   }
6174 
6175   // Find out if any arguments are required to be integer constant
6176   // expressions.
6177   unsigned ICEArguments = 0;
6178   ASTContext::GetBuiltinTypeError Error;
6179   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6180   assert(Error == ASTContext::GE_None && "Should not codegen an error");
6181 
6182   llvm::SmallVector<Value*, 4> Ops;
6183   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
6184     if ((ICEArguments & (1 << i)) == 0) {
6185       Ops.push_back(EmitScalarExpr(E->getArg(i)));
6186     } else {
6187       // If this is required to be a constant, constant fold it so that we know
6188       // that the generated intrinsic gets a ConstantInt.
6189       llvm::APSInt Result;
6190       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
6191       assert(IsConst && "Constant arg isn't actually constant?");
6192       (void)IsConst;
6193       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
6194     }
6195   }
6196 
6197   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
6198   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
6199       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
6200 
6201   if (Builtin) {
6202     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
6203     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
6204     assert(Result && "SISD intrinsic should have been handled");
6205     return Result;
6206   }
6207 
6208   llvm::APSInt Result;
6209   const Expr *Arg = E->getArg(E->getNumArgs()-1);
6210   NeonTypeFlags Type(0);
6211   if (Arg->isIntegerConstantExpr(Result, getContext()))
6212     // Determine the type of this overloaded NEON intrinsic.
6213     Type = NeonTypeFlags(Result.getZExtValue());
6214 
6215   bool usgn = Type.isUnsigned();
6216   bool quad = Type.isQuad();
6217 
6218   // Handle non-overloaded intrinsics first.
6219   switch (BuiltinID) {
6220   default: break;
6221   case NEON::BI__builtin_neon_vabsh_f16:
6222     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6223     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
6224   case NEON::BI__builtin_neon_vldrq_p128: {
6225     llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
6226     llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
6227     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
6228     return Builder.CreateAlignedLoad(Int128Ty, Ptr,
6229                                      CharUnits::fromQuantity(16));
6230   }
6231   case NEON::BI__builtin_neon_vstrq_p128: {
6232     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
6233     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
6234     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
6235   }
6236   case NEON::BI__builtin_neon_vcvts_u32_f32:
6237   case NEON::BI__builtin_neon_vcvtd_u64_f64:
6238     usgn = true;
6239     LLVM_FALLTHROUGH;
6240   case NEON::BI__builtin_neon_vcvts_s32_f32:
6241   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
6242     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6243     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
6244     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
6245     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
6246     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
6247     if (usgn)
6248       return Builder.CreateFPToUI(Ops[0], InTy);
6249     return Builder.CreateFPToSI(Ops[0], InTy);
6250   }
6251   case NEON::BI__builtin_neon_vcvts_f32_u32:
6252   case NEON::BI__builtin_neon_vcvtd_f64_u64:
6253     usgn = true;
6254     LLVM_FALLTHROUGH;
6255   case NEON::BI__builtin_neon_vcvts_f32_s32:
6256   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
6257     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6258     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
6259     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
6260     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
6261     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
6262     if (usgn)
6263       return Builder.CreateUIToFP(Ops[0], FTy);
6264     return Builder.CreateSIToFP(Ops[0], FTy);
6265   }
6266   case NEON::BI__builtin_neon_vcvth_f16_u16:
6267   case NEON::BI__builtin_neon_vcvth_f16_u32:
6268   case NEON::BI__builtin_neon_vcvth_f16_u64:
6269     usgn = true;
6270     // FALL THROUGH
6271   case NEON::BI__builtin_neon_vcvth_f16_s16:
6272   case NEON::BI__builtin_neon_vcvth_f16_s32:
6273   case NEON::BI__builtin_neon_vcvth_f16_s64: {
6274     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6275     llvm::Type *FTy = HalfTy;
6276     llvm::Type *InTy;
6277     if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
6278       InTy = Int64Ty;
6279     else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
6280       InTy = Int32Ty;
6281     else
6282       InTy = Int16Ty;
6283     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
6284     if (usgn)
6285       return Builder.CreateUIToFP(Ops[0], FTy);
6286     return Builder.CreateSIToFP(Ops[0], FTy);
6287   }
6288   case NEON::BI__builtin_neon_vcvth_u16_f16:
6289     usgn = true;
6290     // FALL THROUGH
6291   case NEON::BI__builtin_neon_vcvth_s16_f16: {
6292     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6293     Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
6294     if (usgn)
6295       return Builder.CreateFPToUI(Ops[0], Int16Ty);
6296     return Builder.CreateFPToSI(Ops[0], Int16Ty);
6297   }
6298   case NEON::BI__builtin_neon_vcvth_u32_f16:
6299     usgn = true;
6300     // FALL THROUGH
6301   case NEON::BI__builtin_neon_vcvth_s32_f16: {
6302     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6303     Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
6304     if (usgn)
6305       return Builder.CreateFPToUI(Ops[0], Int32Ty);
6306     return Builder.CreateFPToSI(Ops[0], Int32Ty);
6307   }
6308   case NEON::BI__builtin_neon_vcvth_u64_f16:
6309     usgn = true;
6310     // FALL THROUGH
6311   case NEON::BI__builtin_neon_vcvth_s64_f16: {
6312     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6313     Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
6314     if (usgn)
6315       return Builder.CreateFPToUI(Ops[0], Int64Ty);
6316     return Builder.CreateFPToSI(Ops[0], Int64Ty);
6317   }
6318   case NEON::BI__builtin_neon_vcvtah_u16_f16:
6319   case NEON::BI__builtin_neon_vcvtmh_u16_f16:
6320   case NEON::BI__builtin_neon_vcvtnh_u16_f16:
6321   case NEON::BI__builtin_neon_vcvtph_u16_f16:
6322   case NEON::BI__builtin_neon_vcvtah_s16_f16:
6323   case NEON::BI__builtin_neon_vcvtmh_s16_f16:
6324   case NEON::BI__builtin_neon_vcvtnh_s16_f16:
6325   case NEON::BI__builtin_neon_vcvtph_s16_f16: {
6326     unsigned Int;
6327     llvm::Type* InTy = Int32Ty;
6328     llvm::Type* FTy  = HalfTy;
6329     llvm::Type *Tys[2] = {InTy, FTy};
6330     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6331     switch (BuiltinID) {
6332     default: llvm_unreachable("missing builtin ID in switch!");
6333     case NEON::BI__builtin_neon_vcvtah_u16_f16:
6334       Int = Intrinsic::aarch64_neon_fcvtau; break;
6335     case NEON::BI__builtin_neon_vcvtmh_u16_f16:
6336       Int = Intrinsic::aarch64_neon_fcvtmu; break;
6337     case NEON::BI__builtin_neon_vcvtnh_u16_f16:
6338       Int = Intrinsic::aarch64_neon_fcvtnu; break;
6339     case NEON::BI__builtin_neon_vcvtph_u16_f16:
6340       Int = Intrinsic::aarch64_neon_fcvtpu; break;
6341     case NEON::BI__builtin_neon_vcvtah_s16_f16:
6342       Int = Intrinsic::aarch64_neon_fcvtas; break;
6343     case NEON::BI__builtin_neon_vcvtmh_s16_f16:
6344       Int = Intrinsic::aarch64_neon_fcvtms; break;
6345     case NEON::BI__builtin_neon_vcvtnh_s16_f16:
6346       Int = Intrinsic::aarch64_neon_fcvtns; break;
6347     case NEON::BI__builtin_neon_vcvtph_s16_f16:
6348       Int = Intrinsic::aarch64_neon_fcvtps; break;
6349     }
6350     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
6351     return Builder.CreateTrunc(Ops[0], Int16Ty);
6352   }
6353   case NEON::BI__builtin_neon_vcaleh_f16:
6354   case NEON::BI__builtin_neon_vcalth_f16:
6355   case NEON::BI__builtin_neon_vcageh_f16:
6356   case NEON::BI__builtin_neon_vcagth_f16: {
6357     unsigned Int;
6358     llvm::Type* InTy = Int32Ty;
6359     llvm::Type* FTy  = HalfTy;
6360     llvm::Type *Tys[2] = {InTy, FTy};
6361     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6362     switch (BuiltinID) {
6363     default: llvm_unreachable("missing builtin ID in switch!");
6364     case NEON::BI__builtin_neon_vcageh_f16:
6365       Int = Intrinsic::aarch64_neon_facge; break;
6366     case NEON::BI__builtin_neon_vcagth_f16:
6367       Int = Intrinsic::aarch64_neon_facgt; break;
6368     case NEON::BI__builtin_neon_vcaleh_f16:
6369       Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
6370     case NEON::BI__builtin_neon_vcalth_f16:
6371       Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
6372     }
6373     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
6374     return Builder.CreateTrunc(Ops[0], Int16Ty);
6375   }
6376   case NEON::BI__builtin_neon_vcvth_n_s16_f16:
6377   case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
6378     unsigned Int;
6379     llvm::Type* InTy = Int32Ty;
6380     llvm::Type* FTy  = HalfTy;
6381     llvm::Type *Tys[2] = {InTy, FTy};
6382     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6383     switch (BuiltinID) {
6384     default: llvm_unreachable("missing builtin ID in switch!");
6385     case NEON::BI__builtin_neon_vcvth_n_s16_f16:
6386       Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
6387     case NEON::BI__builtin_neon_vcvth_n_u16_f16:
6388       Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
6389     }
6390     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
6391     return Builder.CreateTrunc(Ops[0], Int16Ty);
6392   }
6393   case NEON::BI__builtin_neon_vcvth_n_f16_s16:
6394   case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
6395     unsigned Int;
6396     llvm::Type* FTy  = HalfTy;
6397     llvm::Type* InTy = Int32Ty;
6398     llvm::Type *Tys[2] = {FTy, InTy};
6399     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6400     switch (BuiltinID) {
6401     default: llvm_unreachable("missing builtin ID in switch!");
6402     case NEON::BI__builtin_neon_vcvth_n_f16_s16:
6403       Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
6404       Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
6405       break;
6406     case NEON::BI__builtin_neon_vcvth_n_f16_u16:
6407       Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
6408       Ops[0] = Builder.CreateZExt(Ops[0], InTy);
6409       break;
6410     }
6411     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
6412   }
6413   case NEON::BI__builtin_neon_vpaddd_s64: {
6414     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
6415     Value *Vec = EmitScalarExpr(E->getArg(0));
6416     // The vector is v2f64, so make sure it's bitcast to that.
6417     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
6418     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
6419     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
6420     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
6421     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
6422     // Pairwise addition of a v2f64 into a scalar f64.
6423     return Builder.CreateAdd(Op0, Op1, "vpaddd");
6424   }
6425   case NEON::BI__builtin_neon_vpaddd_f64: {
6426     llvm::Type *Ty =
6427       llvm::VectorType::get(DoubleTy, 2);
6428     Value *Vec = EmitScalarExpr(E->getArg(0));
6429     // The vector is v2f64, so make sure it's bitcast to that.
6430     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
6431     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
6432     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
6433     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
6434     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
6435     // Pairwise addition of a v2f64 into a scalar f64.
6436     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
6437   }
6438   case NEON::BI__builtin_neon_vpadds_f32: {
6439     llvm::Type *Ty =
6440       llvm::VectorType::get(FloatTy, 2);
6441     Value *Vec = EmitScalarExpr(E->getArg(0));
6442     // The vector is v2f32, so make sure it's bitcast to that.
6443     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
6444     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
6445     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
6446     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
6447     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
6448     // Pairwise addition of a v2f32 into a scalar f32.
6449     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
6450   }
6451   case NEON::BI__builtin_neon_vceqzd_s64:
6452   case NEON::BI__builtin_neon_vceqzd_f64:
6453   case NEON::BI__builtin_neon_vceqzs_f32:
6454   case NEON::BI__builtin_neon_vceqzh_f16:
6455     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6456     return EmitAArch64CompareBuiltinExpr(
6457         Ops[0], ConvertType(E->getCallReturnType(getContext())),
6458         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
6459   case NEON::BI__builtin_neon_vcgezd_s64:
6460   case NEON::BI__builtin_neon_vcgezd_f64:
6461   case NEON::BI__builtin_neon_vcgezs_f32:
6462   case NEON::BI__builtin_neon_vcgezh_f16:
6463     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6464     return EmitAArch64CompareBuiltinExpr(
6465         Ops[0], ConvertType(E->getCallReturnType(getContext())),
6466         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
6467   case NEON::BI__builtin_neon_vclezd_s64:
6468   case NEON::BI__builtin_neon_vclezd_f64:
6469   case NEON::BI__builtin_neon_vclezs_f32:
6470   case NEON::BI__builtin_neon_vclezh_f16:
6471     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6472     return EmitAArch64CompareBuiltinExpr(
6473         Ops[0], ConvertType(E->getCallReturnType(getContext())),
6474         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
6475   case NEON::BI__builtin_neon_vcgtzd_s64:
6476   case NEON::BI__builtin_neon_vcgtzd_f64:
6477   case NEON::BI__builtin_neon_vcgtzs_f32:
6478   case NEON::BI__builtin_neon_vcgtzh_f16:
6479     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6480     return EmitAArch64CompareBuiltinExpr(
6481         Ops[0], ConvertType(E->getCallReturnType(getContext())),
6482         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
6483   case NEON::BI__builtin_neon_vcltzd_s64:
6484   case NEON::BI__builtin_neon_vcltzd_f64:
6485   case NEON::BI__builtin_neon_vcltzs_f32:
6486   case NEON::BI__builtin_neon_vcltzh_f16:
6487     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6488     return EmitAArch64CompareBuiltinExpr(
6489         Ops[0], ConvertType(E->getCallReturnType(getContext())),
6490         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
6491 
6492   case NEON::BI__builtin_neon_vceqzd_u64: {
6493     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6494     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6495     Ops[0] =
6496         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
6497     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
6498   }
6499   case NEON::BI__builtin_neon_vceqd_f64:
6500   case NEON::BI__builtin_neon_vcled_f64:
6501   case NEON::BI__builtin_neon_vcltd_f64:
6502   case NEON::BI__builtin_neon_vcged_f64:
6503   case NEON::BI__builtin_neon_vcgtd_f64: {
6504     llvm::CmpInst::Predicate P;
6505     switch (BuiltinID) {
6506     default: llvm_unreachable("missing builtin ID in switch!");
6507     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
6508     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
6509     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
6510     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
6511     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
6512     }
6513     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6514     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6515     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6516     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6517     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
6518   }
6519   case NEON::BI__builtin_neon_vceqs_f32:
6520   case NEON::BI__builtin_neon_vcles_f32:
6521   case NEON::BI__builtin_neon_vclts_f32:
6522   case NEON::BI__builtin_neon_vcges_f32:
6523   case NEON::BI__builtin_neon_vcgts_f32: {
6524     llvm::CmpInst::Predicate P;
6525     switch (BuiltinID) {
6526     default: llvm_unreachable("missing builtin ID in switch!");
6527     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
6528     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
6529     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
6530     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
6531     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
6532     }
6533     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6534     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
6535     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
6536     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6537     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
6538   }
6539   case NEON::BI__builtin_neon_vceqh_f16:
6540   case NEON::BI__builtin_neon_vcleh_f16:
6541   case NEON::BI__builtin_neon_vclth_f16:
6542   case NEON::BI__builtin_neon_vcgeh_f16:
6543   case NEON::BI__builtin_neon_vcgth_f16: {
6544     llvm::CmpInst::Predicate P;
6545     switch (BuiltinID) {
6546     default: llvm_unreachable("missing builtin ID in switch!");
6547     case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
6548     case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
6549     case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
6550     case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
6551     case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
6552     }
6553     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6554     Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
6555     Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
6556     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
6557     return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
6558   }
6559   case NEON::BI__builtin_neon_vceqd_s64:
6560   case NEON::BI__builtin_neon_vceqd_u64:
6561   case NEON::BI__builtin_neon_vcgtd_s64:
6562   case NEON::BI__builtin_neon_vcgtd_u64:
6563   case NEON::BI__builtin_neon_vcltd_s64:
6564   case NEON::BI__builtin_neon_vcltd_u64:
6565   case NEON::BI__builtin_neon_vcged_u64:
6566   case NEON::BI__builtin_neon_vcged_s64:
6567   case NEON::BI__builtin_neon_vcled_u64:
6568   case NEON::BI__builtin_neon_vcled_s64: {
6569     llvm::CmpInst::Predicate P;
6570     switch (BuiltinID) {
6571     default: llvm_unreachable("missing builtin ID in switch!");
6572     case NEON::BI__builtin_neon_vceqd_s64:
6573     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
6574     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
6575     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
6576     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
6577     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
6578     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
6579     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
6580     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
6581     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
6582     }
6583     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6584     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6585     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6586     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
6587     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
6588   }
6589   case NEON::BI__builtin_neon_vtstd_s64:
6590   case NEON::BI__builtin_neon_vtstd_u64: {
6591     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6592     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
6593     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6594     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
6595     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
6596                                 llvm::Constant::getNullValue(Int64Ty));
6597     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
6598   }
6599   case NEON::BI__builtin_neon_vset_lane_i8:
6600   case NEON::BI__builtin_neon_vset_lane_i16:
6601   case NEON::BI__builtin_neon_vset_lane_i32:
6602   case NEON::BI__builtin_neon_vset_lane_i64:
6603   case NEON::BI__builtin_neon_vset_lane_f32:
6604   case NEON::BI__builtin_neon_vsetq_lane_i8:
6605   case NEON::BI__builtin_neon_vsetq_lane_i16:
6606   case NEON::BI__builtin_neon_vsetq_lane_i32:
6607   case NEON::BI__builtin_neon_vsetq_lane_i64:
6608   case NEON::BI__builtin_neon_vsetq_lane_f32:
6609     Ops.push_back(EmitScalarExpr(E->getArg(2)));
6610     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6611   case NEON::BI__builtin_neon_vset_lane_f64:
6612     // The vector type needs a cast for the v1f64 variant.
6613     Ops[1] = Builder.CreateBitCast(Ops[1],
6614                                    llvm::VectorType::get(DoubleTy, 1));
6615     Ops.push_back(EmitScalarExpr(E->getArg(2)));
6616     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6617   case NEON::BI__builtin_neon_vsetq_lane_f64:
6618     // The vector type needs a cast for the v2f64 variant.
6619     Ops[1] = Builder.CreateBitCast(Ops[1],
6620         llvm::VectorType::get(DoubleTy, 2));
6621     Ops.push_back(EmitScalarExpr(E->getArg(2)));
6622     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6623 
6624   case NEON::BI__builtin_neon_vget_lane_i8:
6625   case NEON::BI__builtin_neon_vdupb_lane_i8:
6626     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
6627     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6628                                         "vget_lane");
6629   case NEON::BI__builtin_neon_vgetq_lane_i8:
6630   case NEON::BI__builtin_neon_vdupb_laneq_i8:
6631     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
6632     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6633                                         "vgetq_lane");
6634   case NEON::BI__builtin_neon_vget_lane_i16:
6635   case NEON::BI__builtin_neon_vduph_lane_i16:
6636     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
6637     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6638                                         "vget_lane");
6639   case NEON::BI__builtin_neon_vgetq_lane_i16:
6640   case NEON::BI__builtin_neon_vduph_laneq_i16:
6641     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
6642     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6643                                         "vgetq_lane");
6644   case NEON::BI__builtin_neon_vget_lane_i32:
6645   case NEON::BI__builtin_neon_vdups_lane_i32:
6646     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
6647     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6648                                         "vget_lane");
6649   case NEON::BI__builtin_neon_vdups_lane_f32:
6650     Ops[0] = Builder.CreateBitCast(Ops[0],
6651         llvm::VectorType::get(FloatTy, 2));
6652     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6653                                         "vdups_lane");
6654   case NEON::BI__builtin_neon_vgetq_lane_i32:
6655   case NEON::BI__builtin_neon_vdups_laneq_i32:
6656     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
6657     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6658                                         "vgetq_lane");
6659   case NEON::BI__builtin_neon_vget_lane_i64:
6660   case NEON::BI__builtin_neon_vdupd_lane_i64:
6661     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
6662     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6663                                         "vget_lane");
6664   case NEON::BI__builtin_neon_vdupd_lane_f64:
6665     Ops[0] = Builder.CreateBitCast(Ops[0],
6666         llvm::VectorType::get(DoubleTy, 1));
6667     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6668                                         "vdupd_lane");
6669   case NEON::BI__builtin_neon_vgetq_lane_i64:
6670   case NEON::BI__builtin_neon_vdupd_laneq_i64:
6671     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
6672     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6673                                         "vgetq_lane");
6674   case NEON::BI__builtin_neon_vget_lane_f32:
6675     Ops[0] = Builder.CreateBitCast(Ops[0],
6676         llvm::VectorType::get(FloatTy, 2));
6677     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6678                                         "vget_lane");
6679   case NEON::BI__builtin_neon_vget_lane_f64:
6680     Ops[0] = Builder.CreateBitCast(Ops[0],
6681         llvm::VectorType::get(DoubleTy, 1));
6682     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6683                                         "vget_lane");
6684   case NEON::BI__builtin_neon_vgetq_lane_f32:
6685   case NEON::BI__builtin_neon_vdups_laneq_f32:
6686     Ops[0] = Builder.CreateBitCast(Ops[0],
6687         llvm::VectorType::get(FloatTy, 4));
6688     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6689                                         "vgetq_lane");
6690   case NEON::BI__builtin_neon_vgetq_lane_f64:
6691   case NEON::BI__builtin_neon_vdupd_laneq_f64:
6692     Ops[0] = Builder.CreateBitCast(Ops[0],
6693         llvm::VectorType::get(DoubleTy, 2));
6694     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
6695                                         "vgetq_lane");
6696   case NEON::BI__builtin_neon_vaddh_f16:
6697     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6698     return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
6699   case NEON::BI__builtin_neon_vsubh_f16:
6700     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6701     return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
6702   case NEON::BI__builtin_neon_vmulh_f16:
6703     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6704     return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
6705   case NEON::BI__builtin_neon_vdivh_f16:
6706     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6707     return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
6708   case NEON::BI__builtin_neon_vfmah_f16: {
6709     Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
6710     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6711     return Builder.CreateCall(F,
6712       {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
6713   }
6714   case NEON::BI__builtin_neon_vfmsh_f16: {
6715     Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
6716     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy);
6717     Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh");
6718     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
6719     return Builder.CreateCall(F, {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
6720   }
6721   case NEON::BI__builtin_neon_vaddd_s64:
6722   case NEON::BI__builtin_neon_vaddd_u64:
6723     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
6724   case NEON::BI__builtin_neon_vsubd_s64:
6725   case NEON::BI__builtin_neon_vsubd_u64:
6726     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
6727   case NEON::BI__builtin_neon_vqdmlalh_s16:
6728   case NEON::BI__builtin_neon_vqdmlslh_s16: {
6729     SmallVector<Value *, 2> ProductOps;
6730     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6731     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
6732     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
6733     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6734                           ProductOps, "vqdmlXl");
6735     Constant *CI = ConstantInt::get(SizeTy, 0);
6736     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6737 
6738     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
6739                                         ? Intrinsic::aarch64_neon_sqadd
6740                                         : Intrinsic::aarch64_neon_sqsub;
6741     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
6742   }
6743   case NEON::BI__builtin_neon_vqshlud_n_s64: {
6744     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6745     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6746     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
6747                         Ops, "vqshlu_n");
6748   }
6749   case NEON::BI__builtin_neon_vqshld_n_u64:
6750   case NEON::BI__builtin_neon_vqshld_n_s64: {
6751     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
6752                                    ? Intrinsic::aarch64_neon_uqshl
6753                                    : Intrinsic::aarch64_neon_sqshl;
6754     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6755     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
6756     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
6757   }
6758   case NEON::BI__builtin_neon_vrshrd_n_u64:
6759   case NEON::BI__builtin_neon_vrshrd_n_s64: {
6760     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
6761                                    ? Intrinsic::aarch64_neon_urshl
6762                                    : Intrinsic::aarch64_neon_srshl;
6763     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6764     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
6765     Ops[1] = ConstantInt::get(Int64Ty, -SV);
6766     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
6767   }
6768   case NEON::BI__builtin_neon_vrsrad_n_u64:
6769   case NEON::BI__builtin_neon_vrsrad_n_s64: {
6770     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
6771                                    ? Intrinsic::aarch64_neon_urshl
6772                                    : Intrinsic::aarch64_neon_srshl;
6773     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
6774     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
6775     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
6776                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
6777     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
6778   }
6779   case NEON::BI__builtin_neon_vshld_n_s64:
6780   case NEON::BI__builtin_neon_vshld_n_u64: {
6781     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6782     return Builder.CreateShl(
6783         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
6784   }
6785   case NEON::BI__builtin_neon_vshrd_n_s64: {
6786     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6787     return Builder.CreateAShr(
6788         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6789                                                    Amt->getZExtValue())),
6790         "shrd_n");
6791   }
6792   case NEON::BI__builtin_neon_vshrd_n_u64: {
6793     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
6794     uint64_t ShiftAmt = Amt->getZExtValue();
6795     // Right-shifting an unsigned value by its size yields 0.
6796     if (ShiftAmt == 64)
6797       return ConstantInt::get(Int64Ty, 0);
6798     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
6799                               "shrd_n");
6800   }
6801   case NEON::BI__builtin_neon_vsrad_n_s64: {
6802     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6803     Ops[1] = Builder.CreateAShr(
6804         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
6805                                                    Amt->getZExtValue())),
6806         "shrd_n");
6807     return Builder.CreateAdd(Ops[0], Ops[1]);
6808   }
6809   case NEON::BI__builtin_neon_vsrad_n_u64: {
6810     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
6811     uint64_t ShiftAmt = Amt->getZExtValue();
6812     // Right-shifting an unsigned value by its size yields 0.
6813     // As Op + 0 = Op, return Ops[0] directly.
6814     if (ShiftAmt == 64)
6815       return Ops[0];
6816     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
6817                                 "shrd_n");
6818     return Builder.CreateAdd(Ops[0], Ops[1]);
6819   }
6820   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
6821   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
6822   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
6823   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
6824     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6825                                           "lane");
6826     SmallVector<Value *, 2> ProductOps;
6827     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
6828     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
6829     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
6830     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
6831                           ProductOps, "vqdmlXl");
6832     Constant *CI = ConstantInt::get(SizeTy, 0);
6833     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6834     Ops.pop_back();
6835 
6836     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6837                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6838                           ? Intrinsic::aarch64_neon_sqadd
6839                           : Intrinsic::aarch64_neon_sqsub;
6840     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
6841   }
6842   case NEON::BI__builtin_neon_vqdmlals_s32:
6843   case NEON::BI__builtin_neon_vqdmlsls_s32: {
6844     SmallVector<Value *, 2> ProductOps;
6845     ProductOps.push_back(Ops[1]);
6846     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
6847     Ops[1] =
6848         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6849                      ProductOps, "vqdmlXl");
6850 
6851     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6852                                         ? Intrinsic::aarch64_neon_sqadd
6853                                         : Intrinsic::aarch64_neon_sqsub;
6854     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
6855   }
6856   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6857   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6858   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6859   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6860     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6861                                           "lane");
6862     SmallVector<Value *, 2> ProductOps;
6863     ProductOps.push_back(Ops[1]);
6864     ProductOps.push_back(Ops[2]);
6865     Ops[1] =
6866         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6867                      ProductOps, "vqdmlXl");
6868     Ops.pop_back();
6869 
6870     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6871                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6872                           ? Intrinsic::aarch64_neon_sqadd
6873                           : Intrinsic::aarch64_neon_sqsub;
6874     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6875   }
6876   }
6877 
6878   llvm::VectorType *VTy = GetNeonType(this, Type);
6879   llvm::Type *Ty = VTy;
6880   if (!Ty)
6881     return nullptr;
6882 
6883   // Not all intrinsics handled by the common case work for AArch64 yet, so only
6884   // defer to common code if it's been added to our special map.
6885   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
6886                                    AArch64SIMDIntrinsicsProvenSorted);
6887 
6888   if (Builtin)
6889     return EmitCommonNeonBuiltinExpr(
6890         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6891         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
6892         /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
6893 
6894   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
6895     return V;
6896 
6897   unsigned Int;
6898   switch (BuiltinID) {
6899   default: return nullptr;
6900   case NEON::BI__builtin_neon_vbsl_v:
6901   case NEON::BI__builtin_neon_vbslq_v: {
6902     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6903     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6904     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6905     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6906 
6907     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6908     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6909     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6910     return Builder.CreateBitCast(Ops[0], Ty);
6911   }
6912   case NEON::BI__builtin_neon_vfma_lane_v:
6913   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6914     // The ARM builtins (and instructions) have the addend as the first
6915     // operand, but the 'fma' intrinsics have it last. Swap it around here.
6916     Value *Addend = Ops[0];
6917     Value *Multiplicand = Ops[1];
6918     Value *LaneSource = Ops[2];
6919     Ops[0] = Multiplicand;
6920     Ops[1] = LaneSource;
6921     Ops[2] = Addend;
6922 
6923     // Now adjust things to handle the lane access.
6924     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
6925       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
6926       VTy;
6927     llvm::Constant *cst = cast<Constant>(Ops[3]);
6928     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
6929     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6930     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6931 
6932     Ops.pop_back();
6933     Int = Intrinsic::fma;
6934     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6935   }
6936   case NEON::BI__builtin_neon_vfma_laneq_v: {
6937     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6938     // v1f64 fma should be mapped to Neon scalar f64 fma
6939     if (VTy && VTy->getElementType() == DoubleTy) {
6940       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6941       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6942       llvm::Type *VTy = GetNeonType(this,
6943         NeonTypeFlags(NeonTypeFlags::Float64, false, true));
6944       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6945       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6946       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
6947       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6948       return Builder.CreateBitCast(Result, Ty);
6949     }
6950     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6951     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6952     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6953 
6954     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
6955                                             VTy->getNumElements() * 2);
6956     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6957     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
6958                                                cast<ConstantInt>(Ops[3]));
6959     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6960 
6961     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6962   }
6963   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6964     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6965     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6966     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6967 
6968     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6969     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6970     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6971   }
6972   case NEON::BI__builtin_neon_vfmah_lane_f16:
6973   case NEON::BI__builtin_neon_vfmas_lane_f32:
6974   case NEON::BI__builtin_neon_vfmah_laneq_f16:
6975   case NEON::BI__builtin_neon_vfmas_laneq_f32:
6976   case NEON::BI__builtin_neon_vfmad_lane_f64:
6977   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6978     Ops.push_back(EmitScalarExpr(E->getArg(3)));
6979     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
6980     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6981     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6982     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6983   }
6984   case NEON::BI__builtin_neon_vmull_v:
6985     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6986     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6987     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6988     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6989   case NEON::BI__builtin_neon_vmax_v:
6990   case NEON::BI__builtin_neon_vmaxq_v:
6991     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6992     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6993     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6994     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6995   case NEON::BI__builtin_neon_vmaxh_f16: {
6996     Ops.push_back(EmitScalarExpr(E->getArg(1)));
6997     Int = Intrinsic::aarch64_neon_fmax;
6998     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
6999   }
7000   case NEON::BI__builtin_neon_vmin_v:
7001   case NEON::BI__builtin_neon_vminq_v:
7002     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
7003     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
7004     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
7005     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
7006   case NEON::BI__builtin_neon_vminh_f16: {
7007     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7008     Int = Intrinsic::aarch64_neon_fmin;
7009     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
7010   }
7011   case NEON::BI__builtin_neon_vabd_v:
7012   case NEON::BI__builtin_neon_vabdq_v:
7013     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
7014     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
7015     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
7016     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
7017   case NEON::BI__builtin_neon_vpadal_v:
7018   case NEON::BI__builtin_neon_vpadalq_v: {
7019     unsigned ArgElts = VTy->getNumElements();
7020     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
7021     unsigned BitWidth = EltTy->getBitWidth();
7022     llvm::Type *ArgTy = llvm::VectorType::get(
7023         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
7024     llvm::Type* Tys[2] = { VTy, ArgTy };
7025     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
7026     SmallVector<llvm::Value*, 1> TmpOps;
7027     TmpOps.push_back(Ops[1]);
7028     Function *F = CGM.getIntrinsic(Int, Tys);
7029     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
7030     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
7031     return Builder.CreateAdd(tmp, addend);
7032   }
7033   case NEON::BI__builtin_neon_vpmin_v:
7034   case NEON::BI__builtin_neon_vpminq_v:
7035     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
7036     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
7037     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
7038     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
7039   case NEON::BI__builtin_neon_vpmax_v:
7040   case NEON::BI__builtin_neon_vpmaxq_v:
7041     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
7042     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
7043     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
7044     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
7045   case NEON::BI__builtin_neon_vminnm_v:
7046   case NEON::BI__builtin_neon_vminnmq_v:
7047     Int = Intrinsic::aarch64_neon_fminnm;
7048     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
7049   case NEON::BI__builtin_neon_vminnmh_f16:
7050     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7051     Int = Intrinsic::aarch64_neon_fminnm;
7052     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
7053   case NEON::BI__builtin_neon_vmaxnm_v:
7054   case NEON::BI__builtin_neon_vmaxnmq_v:
7055     Int = Intrinsic::aarch64_neon_fmaxnm;
7056     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
7057   case NEON::BI__builtin_neon_vmaxnmh_f16:
7058     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7059     Int = Intrinsic::aarch64_neon_fmaxnm;
7060     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
7061   case NEON::BI__builtin_neon_vrecpss_f32: {
7062     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7063     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
7064                         Ops, "vrecps");
7065   }
7066   case NEON::BI__builtin_neon_vrecpsd_f64:
7067     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7068     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
7069                         Ops, "vrecps");
7070   case NEON::BI__builtin_neon_vrecpsh_f16:
7071     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7072     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
7073                         Ops, "vrecps");
7074   case NEON::BI__builtin_neon_vqshrun_n_v:
7075     Int = Intrinsic::aarch64_neon_sqshrun;
7076     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
7077   case NEON::BI__builtin_neon_vqrshrun_n_v:
7078     Int = Intrinsic::aarch64_neon_sqrshrun;
7079     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
7080   case NEON::BI__builtin_neon_vqshrn_n_v:
7081     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
7082     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
7083   case NEON::BI__builtin_neon_vrshrn_n_v:
7084     Int = Intrinsic::aarch64_neon_rshrn;
7085     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
7086   case NEON::BI__builtin_neon_vqrshrn_n_v:
7087     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
7088     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
7089   case NEON::BI__builtin_neon_vrndah_f16: {
7090     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7091     Int = Intrinsic::round;
7092     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
7093   }
7094   case NEON::BI__builtin_neon_vrnda_v:
7095   case NEON::BI__builtin_neon_vrndaq_v: {
7096     Int = Intrinsic::round;
7097     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
7098   }
7099   case NEON::BI__builtin_neon_vrndih_f16: {
7100     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7101     Int = Intrinsic::nearbyint;
7102     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
7103   }
7104   case NEON::BI__builtin_neon_vrndi_v:
7105   case NEON::BI__builtin_neon_vrndiq_v: {
7106     Int = Intrinsic::nearbyint;
7107     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
7108   }
7109   case NEON::BI__builtin_neon_vrndmh_f16: {
7110     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7111     Int = Intrinsic::floor;
7112     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
7113   }
7114   case NEON::BI__builtin_neon_vrndm_v:
7115   case NEON::BI__builtin_neon_vrndmq_v: {
7116     Int = Intrinsic::floor;
7117     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
7118   }
7119   case NEON::BI__builtin_neon_vrndnh_f16: {
7120     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7121     Int = Intrinsic::aarch64_neon_frintn;
7122     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
7123   }
7124   case NEON::BI__builtin_neon_vrndn_v:
7125   case NEON::BI__builtin_neon_vrndnq_v: {
7126     Int = Intrinsic::aarch64_neon_frintn;
7127     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
7128   }
7129   case NEON::BI__builtin_neon_vrndph_f16: {
7130     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7131     Int = Intrinsic::ceil;
7132     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
7133   }
7134   case NEON::BI__builtin_neon_vrndp_v:
7135   case NEON::BI__builtin_neon_vrndpq_v: {
7136     Int = Intrinsic::ceil;
7137     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
7138   }
7139   case NEON::BI__builtin_neon_vrndxh_f16: {
7140     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7141     Int = Intrinsic::rint;
7142     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
7143   }
7144   case NEON::BI__builtin_neon_vrndx_v:
7145   case NEON::BI__builtin_neon_vrndxq_v: {
7146     Int = Intrinsic::rint;
7147     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
7148   }
7149   case NEON::BI__builtin_neon_vrndh_f16: {
7150     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7151     Int = Intrinsic::trunc;
7152     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
7153   }
7154   case NEON::BI__builtin_neon_vrnd_v:
7155   case NEON::BI__builtin_neon_vrndq_v: {
7156     Int = Intrinsic::trunc;
7157     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
7158   }
7159   case NEON::BI__builtin_neon_vcvt_f64_v:
7160   case NEON::BI__builtin_neon_vcvtq_f64_v:
7161     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7162     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
7163     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7164                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7165   case NEON::BI__builtin_neon_vcvt_f64_f32: {
7166     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
7167            "unexpected vcvt_f64_f32 builtin");
7168     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
7169     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
7170 
7171     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
7172   }
7173   case NEON::BI__builtin_neon_vcvt_f32_f64: {
7174     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
7175            "unexpected vcvt_f32_f64 builtin");
7176     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
7177     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
7178 
7179     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
7180   }
7181   case NEON::BI__builtin_neon_vcvt_s32_v:
7182   case NEON::BI__builtin_neon_vcvt_u32_v:
7183   case NEON::BI__builtin_neon_vcvt_s64_v:
7184   case NEON::BI__builtin_neon_vcvt_u64_v:
7185 	case NEON::BI__builtin_neon_vcvt_s16_v:
7186 	case NEON::BI__builtin_neon_vcvt_u16_v:
7187   case NEON::BI__builtin_neon_vcvtq_s32_v:
7188   case NEON::BI__builtin_neon_vcvtq_u32_v:
7189   case NEON::BI__builtin_neon_vcvtq_s64_v:
7190   case NEON::BI__builtin_neon_vcvtq_u64_v:
7191 	case NEON::BI__builtin_neon_vcvtq_s16_v:
7192 	case NEON::BI__builtin_neon_vcvtq_u16_v: {
7193     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
7194     if (usgn)
7195       return Builder.CreateFPToUI(Ops[0], Ty);
7196     return Builder.CreateFPToSI(Ops[0], Ty);
7197   }
7198   case NEON::BI__builtin_neon_vcvta_s16_v:
7199   case NEON::BI__builtin_neon_vcvta_s32_v:
7200   case NEON::BI__builtin_neon_vcvtaq_s16_v:
7201   case NEON::BI__builtin_neon_vcvtaq_s32_v:
7202   case NEON::BI__builtin_neon_vcvta_u32_v:
7203   case NEON::BI__builtin_neon_vcvtaq_u16_v:
7204   case NEON::BI__builtin_neon_vcvtaq_u32_v:
7205   case NEON::BI__builtin_neon_vcvta_s64_v:
7206   case NEON::BI__builtin_neon_vcvtaq_s64_v:
7207   case NEON::BI__builtin_neon_vcvta_u64_v:
7208   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
7209     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
7210     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7211     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
7212   }
7213   case NEON::BI__builtin_neon_vcvtm_s16_v:
7214   case NEON::BI__builtin_neon_vcvtm_s32_v:
7215   case NEON::BI__builtin_neon_vcvtmq_s16_v:
7216   case NEON::BI__builtin_neon_vcvtmq_s32_v:
7217   case NEON::BI__builtin_neon_vcvtm_u16_v:
7218   case NEON::BI__builtin_neon_vcvtm_u32_v:
7219   case NEON::BI__builtin_neon_vcvtmq_u16_v:
7220   case NEON::BI__builtin_neon_vcvtmq_u32_v:
7221   case NEON::BI__builtin_neon_vcvtm_s64_v:
7222   case NEON::BI__builtin_neon_vcvtmq_s64_v:
7223   case NEON::BI__builtin_neon_vcvtm_u64_v:
7224   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7225     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
7226     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7227     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
7228   }
7229   case NEON::BI__builtin_neon_vcvtn_s16_v:
7230   case NEON::BI__builtin_neon_vcvtn_s32_v:
7231   case NEON::BI__builtin_neon_vcvtnq_s16_v:
7232   case NEON::BI__builtin_neon_vcvtnq_s32_v:
7233   case NEON::BI__builtin_neon_vcvtn_u16_v:
7234   case NEON::BI__builtin_neon_vcvtn_u32_v:
7235   case NEON::BI__builtin_neon_vcvtnq_u16_v:
7236   case NEON::BI__builtin_neon_vcvtnq_u32_v:
7237   case NEON::BI__builtin_neon_vcvtn_s64_v:
7238   case NEON::BI__builtin_neon_vcvtnq_s64_v:
7239   case NEON::BI__builtin_neon_vcvtn_u64_v:
7240   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
7241     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
7242     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7243     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
7244   }
7245   case NEON::BI__builtin_neon_vcvtp_s16_v:
7246   case NEON::BI__builtin_neon_vcvtp_s32_v:
7247   case NEON::BI__builtin_neon_vcvtpq_s16_v:
7248   case NEON::BI__builtin_neon_vcvtpq_s32_v:
7249   case NEON::BI__builtin_neon_vcvtp_u16_v:
7250   case NEON::BI__builtin_neon_vcvtp_u32_v:
7251   case NEON::BI__builtin_neon_vcvtpq_u16_v:
7252   case NEON::BI__builtin_neon_vcvtpq_u32_v:
7253   case NEON::BI__builtin_neon_vcvtp_s64_v:
7254   case NEON::BI__builtin_neon_vcvtpq_s64_v:
7255   case NEON::BI__builtin_neon_vcvtp_u64_v:
7256   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
7257     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
7258     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7259     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
7260   }
7261   case NEON::BI__builtin_neon_vmulx_v:
7262   case NEON::BI__builtin_neon_vmulxq_v: {
7263     Int = Intrinsic::aarch64_neon_fmulx;
7264     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
7265   }
7266   case NEON::BI__builtin_neon_vmulxh_lane_f16:
7267   case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
7268     // vmulx_lane should be mapped to Neon scalar mulx after
7269     // extracting the scalar element
7270     Ops.push_back(EmitScalarExpr(E->getArg(2)));
7271     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
7272     Ops.pop_back();
7273     Int = Intrinsic::aarch64_neon_fmulx;
7274     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
7275   }
7276   case NEON::BI__builtin_neon_vmul_lane_v:
7277   case NEON::BI__builtin_neon_vmul_laneq_v: {
7278     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
7279     bool Quad = false;
7280     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
7281       Quad = true;
7282     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7283     llvm::Type *VTy = GetNeonType(this,
7284       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
7285     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7286     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
7287     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
7288     return Builder.CreateBitCast(Result, Ty);
7289   }
7290   case NEON::BI__builtin_neon_vnegd_s64:
7291     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
7292   case NEON::BI__builtin_neon_vnegh_f16:
7293     return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
7294   case NEON::BI__builtin_neon_vpmaxnm_v:
7295   case NEON::BI__builtin_neon_vpmaxnmq_v: {
7296     Int = Intrinsic::aarch64_neon_fmaxnmp;
7297     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
7298   }
7299   case NEON::BI__builtin_neon_vpminnm_v:
7300   case NEON::BI__builtin_neon_vpminnmq_v: {
7301     Int = Intrinsic::aarch64_neon_fminnmp;
7302     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
7303   }
7304   case NEON::BI__builtin_neon_vsqrth_f16: {
7305     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7306     Int = Intrinsic::sqrt;
7307     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
7308   }
7309   case NEON::BI__builtin_neon_vsqrt_v:
7310   case NEON::BI__builtin_neon_vsqrtq_v: {
7311     Int = Intrinsic::sqrt;
7312     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7313     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
7314   }
7315   case NEON::BI__builtin_neon_vrbit_v:
7316   case NEON::BI__builtin_neon_vrbitq_v: {
7317     Int = Intrinsic::aarch64_neon_rbit;
7318     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
7319   }
7320   case NEON::BI__builtin_neon_vaddv_u8:
7321     // FIXME: These are handled by the AArch64 scalar code.
7322     usgn = true;
7323     LLVM_FALLTHROUGH;
7324   case NEON::BI__builtin_neon_vaddv_s8: {
7325     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7326     Ty = Int32Ty;
7327     VTy = llvm::VectorType::get(Int8Ty, 8);
7328     llvm::Type *Tys[2] = { Ty, VTy };
7329     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7330     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
7331     return Builder.CreateTrunc(Ops[0], Int8Ty);
7332   }
7333   case NEON::BI__builtin_neon_vaddv_u16:
7334     usgn = true;
7335     LLVM_FALLTHROUGH;
7336   case NEON::BI__builtin_neon_vaddv_s16: {
7337     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7338     Ty = Int32Ty;
7339     VTy = llvm::VectorType::get(Int16Ty, 4);
7340     llvm::Type *Tys[2] = { Ty, VTy };
7341     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7342     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
7343     return Builder.CreateTrunc(Ops[0], Int16Ty);
7344   }
7345   case NEON::BI__builtin_neon_vaddvq_u8:
7346     usgn = true;
7347     LLVM_FALLTHROUGH;
7348   case NEON::BI__builtin_neon_vaddvq_s8: {
7349     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7350     Ty = Int32Ty;
7351     VTy = llvm::VectorType::get(Int8Ty, 16);
7352     llvm::Type *Tys[2] = { Ty, VTy };
7353     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7354     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
7355     return Builder.CreateTrunc(Ops[0], Int8Ty);
7356   }
7357   case NEON::BI__builtin_neon_vaddvq_u16:
7358     usgn = true;
7359     LLVM_FALLTHROUGH;
7360   case NEON::BI__builtin_neon_vaddvq_s16: {
7361     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
7362     Ty = Int32Ty;
7363     VTy = llvm::VectorType::get(Int16Ty, 8);
7364     llvm::Type *Tys[2] = { Ty, VTy };
7365     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7366     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
7367     return Builder.CreateTrunc(Ops[0], Int16Ty);
7368   }
7369   case NEON::BI__builtin_neon_vmaxv_u8: {
7370     Int = Intrinsic::aarch64_neon_umaxv;
7371     Ty = Int32Ty;
7372     VTy = llvm::VectorType::get(Int8Ty, 8);
7373     llvm::Type *Tys[2] = { Ty, VTy };
7374     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7375     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7376     return Builder.CreateTrunc(Ops[0], Int8Ty);
7377   }
7378   case NEON::BI__builtin_neon_vmaxv_u16: {
7379     Int = Intrinsic::aarch64_neon_umaxv;
7380     Ty = Int32Ty;
7381     VTy = llvm::VectorType::get(Int16Ty, 4);
7382     llvm::Type *Tys[2] = { Ty, VTy };
7383     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7384     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7385     return Builder.CreateTrunc(Ops[0], Int16Ty);
7386   }
7387   case NEON::BI__builtin_neon_vmaxvq_u8: {
7388     Int = Intrinsic::aarch64_neon_umaxv;
7389     Ty = Int32Ty;
7390     VTy = llvm::VectorType::get(Int8Ty, 16);
7391     llvm::Type *Tys[2] = { Ty, VTy };
7392     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7393     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7394     return Builder.CreateTrunc(Ops[0], Int8Ty);
7395   }
7396   case NEON::BI__builtin_neon_vmaxvq_u16: {
7397     Int = Intrinsic::aarch64_neon_umaxv;
7398     Ty = Int32Ty;
7399     VTy = llvm::VectorType::get(Int16Ty, 8);
7400     llvm::Type *Tys[2] = { Ty, VTy };
7401     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7402     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7403     return Builder.CreateTrunc(Ops[0], Int16Ty);
7404   }
7405   case NEON::BI__builtin_neon_vmaxv_s8: {
7406     Int = Intrinsic::aarch64_neon_smaxv;
7407     Ty = Int32Ty;
7408     VTy = llvm::VectorType::get(Int8Ty, 8);
7409     llvm::Type *Tys[2] = { Ty, VTy };
7410     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7411     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7412     return Builder.CreateTrunc(Ops[0], Int8Ty);
7413   }
7414   case NEON::BI__builtin_neon_vmaxv_s16: {
7415     Int = Intrinsic::aarch64_neon_smaxv;
7416     Ty = Int32Ty;
7417     VTy = llvm::VectorType::get(Int16Ty, 4);
7418     llvm::Type *Tys[2] = { Ty, VTy };
7419     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7420     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7421     return Builder.CreateTrunc(Ops[0], Int16Ty);
7422   }
7423   case NEON::BI__builtin_neon_vmaxvq_s8: {
7424     Int = Intrinsic::aarch64_neon_smaxv;
7425     Ty = Int32Ty;
7426     VTy = llvm::VectorType::get(Int8Ty, 16);
7427     llvm::Type *Tys[2] = { Ty, VTy };
7428     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7429     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7430     return Builder.CreateTrunc(Ops[0], Int8Ty);
7431   }
7432   case NEON::BI__builtin_neon_vmaxvq_s16: {
7433     Int = Intrinsic::aarch64_neon_smaxv;
7434     Ty = Int32Ty;
7435     VTy = llvm::VectorType::get(Int16Ty, 8);
7436     llvm::Type *Tys[2] = { Ty, VTy };
7437     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7438     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7439     return Builder.CreateTrunc(Ops[0], Int16Ty);
7440   }
7441   case NEON::BI__builtin_neon_vmaxv_f16: {
7442     Int = Intrinsic::aarch64_neon_fmaxv;
7443     Ty = HalfTy;
7444     VTy = llvm::VectorType::get(HalfTy, 4);
7445     llvm::Type *Tys[2] = { Ty, VTy };
7446     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7447     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7448     return Builder.CreateTrunc(Ops[0], HalfTy);
7449   }
7450   case NEON::BI__builtin_neon_vmaxvq_f16: {
7451     Int = Intrinsic::aarch64_neon_fmaxv;
7452     Ty = HalfTy;
7453     VTy = llvm::VectorType::get(HalfTy, 8);
7454     llvm::Type *Tys[2] = { Ty, VTy };
7455     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7456     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
7457     return Builder.CreateTrunc(Ops[0], HalfTy);
7458   }
7459   case NEON::BI__builtin_neon_vminv_u8: {
7460     Int = Intrinsic::aarch64_neon_uminv;
7461     Ty = Int32Ty;
7462     VTy = llvm::VectorType::get(Int8Ty, 8);
7463     llvm::Type *Tys[2] = { Ty, VTy };
7464     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7465     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7466     return Builder.CreateTrunc(Ops[0], Int8Ty);
7467   }
7468   case NEON::BI__builtin_neon_vminv_u16: {
7469     Int = Intrinsic::aarch64_neon_uminv;
7470     Ty = Int32Ty;
7471     VTy = llvm::VectorType::get(Int16Ty, 4);
7472     llvm::Type *Tys[2] = { Ty, VTy };
7473     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7474     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7475     return Builder.CreateTrunc(Ops[0], Int16Ty);
7476   }
7477   case NEON::BI__builtin_neon_vminvq_u8: {
7478     Int = Intrinsic::aarch64_neon_uminv;
7479     Ty = Int32Ty;
7480     VTy = llvm::VectorType::get(Int8Ty, 16);
7481     llvm::Type *Tys[2] = { Ty, VTy };
7482     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7483     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7484     return Builder.CreateTrunc(Ops[0], Int8Ty);
7485   }
7486   case NEON::BI__builtin_neon_vminvq_u16: {
7487     Int = Intrinsic::aarch64_neon_uminv;
7488     Ty = Int32Ty;
7489     VTy = llvm::VectorType::get(Int16Ty, 8);
7490     llvm::Type *Tys[2] = { Ty, VTy };
7491     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7492     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7493     return Builder.CreateTrunc(Ops[0], Int16Ty);
7494   }
7495   case NEON::BI__builtin_neon_vminv_s8: {
7496     Int = Intrinsic::aarch64_neon_sminv;
7497     Ty = Int32Ty;
7498     VTy = llvm::VectorType::get(Int8Ty, 8);
7499     llvm::Type *Tys[2] = { Ty, VTy };
7500     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7501     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7502     return Builder.CreateTrunc(Ops[0], Int8Ty);
7503   }
7504   case NEON::BI__builtin_neon_vminv_s16: {
7505     Int = Intrinsic::aarch64_neon_sminv;
7506     Ty = Int32Ty;
7507     VTy = llvm::VectorType::get(Int16Ty, 4);
7508     llvm::Type *Tys[2] = { Ty, VTy };
7509     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7510     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7511     return Builder.CreateTrunc(Ops[0], Int16Ty);
7512   }
7513   case NEON::BI__builtin_neon_vminvq_s8: {
7514     Int = Intrinsic::aarch64_neon_sminv;
7515     Ty = Int32Ty;
7516     VTy = llvm::VectorType::get(Int8Ty, 16);
7517     llvm::Type *Tys[2] = { Ty, VTy };
7518     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7519     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7520     return Builder.CreateTrunc(Ops[0], Int8Ty);
7521   }
7522   case NEON::BI__builtin_neon_vminvq_s16: {
7523     Int = Intrinsic::aarch64_neon_sminv;
7524     Ty = Int32Ty;
7525     VTy = llvm::VectorType::get(Int16Ty, 8);
7526     llvm::Type *Tys[2] = { Ty, VTy };
7527     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7528     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7529     return Builder.CreateTrunc(Ops[0], Int16Ty);
7530   }
7531   case NEON::BI__builtin_neon_vminv_f16: {
7532     Int = Intrinsic::aarch64_neon_fminv;
7533     Ty = HalfTy;
7534     VTy = llvm::VectorType::get(HalfTy, 4);
7535     llvm::Type *Tys[2] = { Ty, VTy };
7536     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7537     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7538     return Builder.CreateTrunc(Ops[0], HalfTy);
7539   }
7540   case NEON::BI__builtin_neon_vminvq_f16: {
7541     Int = Intrinsic::aarch64_neon_fminv;
7542     Ty = HalfTy;
7543     VTy = llvm::VectorType::get(HalfTy, 8);
7544     llvm::Type *Tys[2] = { Ty, VTy };
7545     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7546     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
7547     return Builder.CreateTrunc(Ops[0], HalfTy);
7548   }
7549   case NEON::BI__builtin_neon_vmaxnmv_f16: {
7550     Int = Intrinsic::aarch64_neon_fmaxnmv;
7551     Ty = HalfTy;
7552     VTy = llvm::VectorType::get(HalfTy, 4);
7553     llvm::Type *Tys[2] = { Ty, VTy };
7554     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7555     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
7556     return Builder.CreateTrunc(Ops[0], HalfTy);
7557   }
7558   case NEON::BI__builtin_neon_vmaxnmvq_f16: {
7559     Int = Intrinsic::aarch64_neon_fmaxnmv;
7560     Ty = HalfTy;
7561     VTy = llvm::VectorType::get(HalfTy, 8);
7562     llvm::Type *Tys[2] = { Ty, VTy };
7563     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7564     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
7565     return Builder.CreateTrunc(Ops[0], HalfTy);
7566   }
7567   case NEON::BI__builtin_neon_vminnmv_f16: {
7568     Int = Intrinsic::aarch64_neon_fminnmv;
7569     Ty = HalfTy;
7570     VTy = llvm::VectorType::get(HalfTy, 4);
7571     llvm::Type *Tys[2] = { Ty, VTy };
7572     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7573     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
7574     return Builder.CreateTrunc(Ops[0], HalfTy);
7575   }
7576   case NEON::BI__builtin_neon_vminnmvq_f16: {
7577     Int = Intrinsic::aarch64_neon_fminnmv;
7578     Ty = HalfTy;
7579     VTy = llvm::VectorType::get(HalfTy, 8);
7580     llvm::Type *Tys[2] = { Ty, VTy };
7581     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7582     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
7583     return Builder.CreateTrunc(Ops[0], HalfTy);
7584   }
7585   case NEON::BI__builtin_neon_vmul_n_f64: {
7586     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7587     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
7588     return Builder.CreateFMul(Ops[0], RHS);
7589   }
7590   case NEON::BI__builtin_neon_vaddlv_u8: {
7591     Int = Intrinsic::aarch64_neon_uaddlv;
7592     Ty = Int32Ty;
7593     VTy = llvm::VectorType::get(Int8Ty, 8);
7594     llvm::Type *Tys[2] = { Ty, VTy };
7595     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7596     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7597     return Builder.CreateTrunc(Ops[0], Int16Ty);
7598   }
7599   case NEON::BI__builtin_neon_vaddlv_u16: {
7600     Int = Intrinsic::aarch64_neon_uaddlv;
7601     Ty = Int32Ty;
7602     VTy = llvm::VectorType::get(Int16Ty, 4);
7603     llvm::Type *Tys[2] = { Ty, VTy };
7604     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7605     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7606   }
7607   case NEON::BI__builtin_neon_vaddlvq_u8: {
7608     Int = Intrinsic::aarch64_neon_uaddlv;
7609     Ty = Int32Ty;
7610     VTy = llvm::VectorType::get(Int8Ty, 16);
7611     llvm::Type *Tys[2] = { Ty, VTy };
7612     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7613     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7614     return Builder.CreateTrunc(Ops[0], Int16Ty);
7615   }
7616   case NEON::BI__builtin_neon_vaddlvq_u16: {
7617     Int = Intrinsic::aarch64_neon_uaddlv;
7618     Ty = Int32Ty;
7619     VTy = llvm::VectorType::get(Int16Ty, 8);
7620     llvm::Type *Tys[2] = { Ty, VTy };
7621     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7622     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7623   }
7624   case NEON::BI__builtin_neon_vaddlv_s8: {
7625     Int = Intrinsic::aarch64_neon_saddlv;
7626     Ty = Int32Ty;
7627     VTy = llvm::VectorType::get(Int8Ty, 8);
7628     llvm::Type *Tys[2] = { Ty, VTy };
7629     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7630     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7631     return Builder.CreateTrunc(Ops[0], Int16Ty);
7632   }
7633   case NEON::BI__builtin_neon_vaddlv_s16: {
7634     Int = Intrinsic::aarch64_neon_saddlv;
7635     Ty = Int32Ty;
7636     VTy = llvm::VectorType::get(Int16Ty, 4);
7637     llvm::Type *Tys[2] = { Ty, VTy };
7638     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7639     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7640   }
7641   case NEON::BI__builtin_neon_vaddlvq_s8: {
7642     Int = Intrinsic::aarch64_neon_saddlv;
7643     Ty = Int32Ty;
7644     VTy = llvm::VectorType::get(Int8Ty, 16);
7645     llvm::Type *Tys[2] = { Ty, VTy };
7646     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7647     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7648     return Builder.CreateTrunc(Ops[0], Int16Ty);
7649   }
7650   case NEON::BI__builtin_neon_vaddlvq_s16: {
7651     Int = Intrinsic::aarch64_neon_saddlv;
7652     Ty = Int32Ty;
7653     VTy = llvm::VectorType::get(Int16Ty, 8);
7654     llvm::Type *Tys[2] = { Ty, VTy };
7655     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7656     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
7657   }
7658   case NEON::BI__builtin_neon_vsri_n_v:
7659   case NEON::BI__builtin_neon_vsriq_n_v: {
7660     Int = Intrinsic::aarch64_neon_vsri;
7661     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
7662     return EmitNeonCall(Intrin, Ops, "vsri_n");
7663   }
7664   case NEON::BI__builtin_neon_vsli_n_v:
7665   case NEON::BI__builtin_neon_vsliq_n_v: {
7666     Int = Intrinsic::aarch64_neon_vsli;
7667     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
7668     return EmitNeonCall(Intrin, Ops, "vsli_n");
7669   }
7670   case NEON::BI__builtin_neon_vsra_n_v:
7671   case NEON::BI__builtin_neon_vsraq_n_v:
7672     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7673     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
7674     return Builder.CreateAdd(Ops[0], Ops[1]);
7675   case NEON::BI__builtin_neon_vrsra_n_v:
7676   case NEON::BI__builtin_neon_vrsraq_n_v: {
7677     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
7678     SmallVector<llvm::Value*,2> TmpOps;
7679     TmpOps.push_back(Ops[1]);
7680     TmpOps.push_back(Ops[2]);
7681     Function* F = CGM.getIntrinsic(Int, Ty);
7682     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
7683     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7684     return Builder.CreateAdd(Ops[0], tmp);
7685   }
7686     // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
7687     // of an Align parameter here.
7688   case NEON::BI__builtin_neon_vld1_x2_v:
7689   case NEON::BI__builtin_neon_vld1q_x2_v:
7690   case NEON::BI__builtin_neon_vld1_x3_v:
7691   case NEON::BI__builtin_neon_vld1q_x3_v:
7692   case NEON::BI__builtin_neon_vld1_x4_v:
7693   case NEON::BI__builtin_neon_vld1q_x4_v: {
7694     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
7695     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7696     llvm::Type *Tys[2] = { VTy, PTy };
7697     unsigned Int;
7698     switch (BuiltinID) {
7699     case NEON::BI__builtin_neon_vld1_x2_v:
7700     case NEON::BI__builtin_neon_vld1q_x2_v:
7701       Int = Intrinsic::aarch64_neon_ld1x2;
7702       break;
7703     case NEON::BI__builtin_neon_vld1_x3_v:
7704     case NEON::BI__builtin_neon_vld1q_x3_v:
7705       Int = Intrinsic::aarch64_neon_ld1x3;
7706       break;
7707     case NEON::BI__builtin_neon_vld1_x4_v:
7708     case NEON::BI__builtin_neon_vld1q_x4_v:
7709       Int = Intrinsic::aarch64_neon_ld1x4;
7710       break;
7711     }
7712     Function *F = CGM.getIntrinsic(Int, Tys);
7713     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
7714     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7715     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7716     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7717   }
7718   case NEON::BI__builtin_neon_vst1_x2_v:
7719   case NEON::BI__builtin_neon_vst1q_x2_v:
7720   case NEON::BI__builtin_neon_vst1_x3_v:
7721   case NEON::BI__builtin_neon_vst1q_x3_v:
7722   case NEON::BI__builtin_neon_vst1_x4_v:
7723   case NEON::BI__builtin_neon_vst1q_x4_v: {
7724     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
7725     llvm::Type *Tys[2] = { VTy, PTy };
7726     unsigned Int;
7727     switch (BuiltinID) {
7728     case NEON::BI__builtin_neon_vst1_x2_v:
7729     case NEON::BI__builtin_neon_vst1q_x2_v:
7730       Int = Intrinsic::aarch64_neon_st1x2;
7731       break;
7732     case NEON::BI__builtin_neon_vst1_x3_v:
7733     case NEON::BI__builtin_neon_vst1q_x3_v:
7734       Int = Intrinsic::aarch64_neon_st1x3;
7735       break;
7736     case NEON::BI__builtin_neon_vst1_x4_v:
7737     case NEON::BI__builtin_neon_vst1q_x4_v:
7738       Int = Intrinsic::aarch64_neon_st1x4;
7739       break;
7740     }
7741     std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
7742     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
7743   }
7744   case NEON::BI__builtin_neon_vld1_v:
7745   case NEON::BI__builtin_neon_vld1q_v: {
7746     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
7747     auto Alignment = CharUnits::fromQuantity(
7748         BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16);
7749     return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
7750   }
7751   case NEON::BI__builtin_neon_vst1_v:
7752   case NEON::BI__builtin_neon_vst1q_v:
7753     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
7754     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7755     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7756   case NEON::BI__builtin_neon_vld1_lane_v:
7757   case NEON::BI__builtin_neon_vld1q_lane_v: {
7758     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7759     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
7760     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7761     auto Alignment = CharUnits::fromQuantity(
7762         BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16);
7763     Ops[0] =
7764         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
7765     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
7766   }
7767   case NEON::BI__builtin_neon_vld1_dup_v:
7768   case NEON::BI__builtin_neon_vld1q_dup_v: {
7769     Value *V = UndefValue::get(Ty);
7770     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
7771     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7772     auto Alignment = CharUnits::fromQuantity(
7773         BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16);
7774     Ops[0] =
7775         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
7776     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
7777     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
7778     return EmitNeonSplat(Ops[0], CI);
7779   }
7780   case NEON::BI__builtin_neon_vst1_lane_v:
7781   case NEON::BI__builtin_neon_vst1q_lane_v:
7782     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7783     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
7784     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7785     return Builder.CreateDefaultAlignedStore(Ops[1],
7786                                              Builder.CreateBitCast(Ops[0], Ty));
7787   case NEON::BI__builtin_neon_vld2_v:
7788   case NEON::BI__builtin_neon_vld2q_v: {
7789     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
7790     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7791     llvm::Type *Tys[2] = { VTy, PTy };
7792     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
7793     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
7794     Ops[0] = Builder.CreateBitCast(Ops[0],
7795                 llvm::PointerType::getUnqual(Ops[1]->getType()));
7796     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7797   }
7798   case NEON::BI__builtin_neon_vld3_v:
7799   case NEON::BI__builtin_neon_vld3q_v: {
7800     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
7801     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7802     llvm::Type *Tys[2] = { VTy, PTy };
7803     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
7804     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7805     Ops[0] = Builder.CreateBitCast(Ops[0],
7806                 llvm::PointerType::getUnqual(Ops[1]->getType()));
7807     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7808   }
7809   case NEON::BI__builtin_neon_vld4_v:
7810   case NEON::BI__builtin_neon_vld4q_v: {
7811     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
7812     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7813     llvm::Type *Tys[2] = { VTy, PTy };
7814     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
7815     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7816     Ops[0] = Builder.CreateBitCast(Ops[0],
7817                 llvm::PointerType::getUnqual(Ops[1]->getType()));
7818     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7819   }
7820   case NEON::BI__builtin_neon_vld2_dup_v:
7821   case NEON::BI__builtin_neon_vld2q_dup_v: {
7822     llvm::Type *PTy =
7823       llvm::PointerType::getUnqual(VTy->getElementType());
7824     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7825     llvm::Type *Tys[2] = { VTy, PTy };
7826     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
7827     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
7828     Ops[0] = Builder.CreateBitCast(Ops[0],
7829                 llvm::PointerType::getUnqual(Ops[1]->getType()));
7830     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7831   }
7832   case NEON::BI__builtin_neon_vld3_dup_v:
7833   case NEON::BI__builtin_neon_vld3q_dup_v: {
7834     llvm::Type *PTy =
7835       llvm::PointerType::getUnqual(VTy->getElementType());
7836     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7837     llvm::Type *Tys[2] = { VTy, PTy };
7838     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
7839     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
7840     Ops[0] = Builder.CreateBitCast(Ops[0],
7841                 llvm::PointerType::getUnqual(Ops[1]->getType()));
7842     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7843   }
7844   case NEON::BI__builtin_neon_vld4_dup_v:
7845   case NEON::BI__builtin_neon_vld4q_dup_v: {
7846     llvm::Type *PTy =
7847       llvm::PointerType::getUnqual(VTy->getElementType());
7848     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
7849     llvm::Type *Tys[2] = { VTy, PTy };
7850     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
7851     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
7852     Ops[0] = Builder.CreateBitCast(Ops[0],
7853                 llvm::PointerType::getUnqual(Ops[1]->getType()));
7854     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7855   }
7856   case NEON::BI__builtin_neon_vld2_lane_v:
7857   case NEON::BI__builtin_neon_vld2q_lane_v: {
7858     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7859     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
7860     Ops.push_back(Ops[1]);
7861     Ops.erase(Ops.begin()+1);
7862     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7863     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7864     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7865     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
7866     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7867     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7868     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7869   }
7870   case NEON::BI__builtin_neon_vld3_lane_v:
7871   case NEON::BI__builtin_neon_vld3q_lane_v: {
7872     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7873     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
7874     Ops.push_back(Ops[1]);
7875     Ops.erase(Ops.begin()+1);
7876     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7877     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7878     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7879     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7880     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
7881     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7882     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7883     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7884   }
7885   case NEON::BI__builtin_neon_vld4_lane_v:
7886   case NEON::BI__builtin_neon_vld4q_lane_v: {
7887     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
7888     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
7889     Ops.push_back(Ops[1]);
7890     Ops.erase(Ops.begin()+1);
7891     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7892     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7893     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
7894     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
7895     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
7896     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
7897     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
7898     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7899     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7900   }
7901   case NEON::BI__builtin_neon_vst2_v:
7902   case NEON::BI__builtin_neon_vst2q_v: {
7903     Ops.push_back(Ops[0]);
7904     Ops.erase(Ops.begin());
7905     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
7906     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
7907                         Ops, "");
7908   }
7909   case NEON::BI__builtin_neon_vst2_lane_v:
7910   case NEON::BI__builtin_neon_vst2q_lane_v: {
7911     Ops.push_back(Ops[0]);
7912     Ops.erase(Ops.begin());
7913     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
7914     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7915     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
7916                         Ops, "");
7917   }
7918   case NEON::BI__builtin_neon_vst3_v:
7919   case NEON::BI__builtin_neon_vst3q_v: {
7920     Ops.push_back(Ops[0]);
7921     Ops.erase(Ops.begin());
7922     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
7923     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
7924                         Ops, "");
7925   }
7926   case NEON::BI__builtin_neon_vst3_lane_v:
7927   case NEON::BI__builtin_neon_vst3q_lane_v: {
7928     Ops.push_back(Ops[0]);
7929     Ops.erase(Ops.begin());
7930     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
7931     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7932     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
7933                         Ops, "");
7934   }
7935   case NEON::BI__builtin_neon_vst4_v:
7936   case NEON::BI__builtin_neon_vst4q_v: {
7937     Ops.push_back(Ops[0]);
7938     Ops.erase(Ops.begin());
7939     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
7940     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
7941                         Ops, "");
7942   }
7943   case NEON::BI__builtin_neon_vst4_lane_v:
7944   case NEON::BI__builtin_neon_vst4q_lane_v: {
7945     Ops.push_back(Ops[0]);
7946     Ops.erase(Ops.begin());
7947     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
7948     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
7949     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
7950                         Ops, "");
7951   }
7952   case NEON::BI__builtin_neon_vtrn_v:
7953   case NEON::BI__builtin_neon_vtrnq_v: {
7954     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7955     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7956     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7957     Value *SV = nullptr;
7958 
7959     for (unsigned vi = 0; vi != 2; ++vi) {
7960       SmallVector<uint32_t, 16> Indices;
7961       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7962         Indices.push_back(i+vi);
7963         Indices.push_back(i+e+vi);
7964       }
7965       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7966       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
7967       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7968     }
7969     return SV;
7970   }
7971   case NEON::BI__builtin_neon_vuzp_v:
7972   case NEON::BI__builtin_neon_vuzpq_v: {
7973     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7974     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7975     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7976     Value *SV = nullptr;
7977 
7978     for (unsigned vi = 0; vi != 2; ++vi) {
7979       SmallVector<uint32_t, 16> Indices;
7980       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7981         Indices.push_back(2*i+vi);
7982 
7983       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7984       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
7985       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7986     }
7987     return SV;
7988   }
7989   case NEON::BI__builtin_neon_vzip_v:
7990   case NEON::BI__builtin_neon_vzipq_v: {
7991     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7992     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7993     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7994     Value *SV = nullptr;
7995 
7996     for (unsigned vi = 0; vi != 2; ++vi) {
7997       SmallVector<uint32_t, 16> Indices;
7998       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7999         Indices.push_back((i + vi*e) >> 1);
8000         Indices.push_back(((i + vi*e) >> 1)+e);
8001       }
8002       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8003       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8004       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8005     }
8006     return SV;
8007   }
8008   case NEON::BI__builtin_neon_vqtbl1q_v: {
8009     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
8010                         Ops, "vtbl1");
8011   }
8012   case NEON::BI__builtin_neon_vqtbl2q_v: {
8013     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
8014                         Ops, "vtbl2");
8015   }
8016   case NEON::BI__builtin_neon_vqtbl3q_v: {
8017     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
8018                         Ops, "vtbl3");
8019   }
8020   case NEON::BI__builtin_neon_vqtbl4q_v: {
8021     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
8022                         Ops, "vtbl4");
8023   }
8024   case NEON::BI__builtin_neon_vqtbx1q_v: {
8025     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
8026                         Ops, "vtbx1");
8027   }
8028   case NEON::BI__builtin_neon_vqtbx2q_v: {
8029     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
8030                         Ops, "vtbx2");
8031   }
8032   case NEON::BI__builtin_neon_vqtbx3q_v: {
8033     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
8034                         Ops, "vtbx3");
8035   }
8036   case NEON::BI__builtin_neon_vqtbx4q_v: {
8037     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
8038                         Ops, "vtbx4");
8039   }
8040   case NEON::BI__builtin_neon_vsqadd_v:
8041   case NEON::BI__builtin_neon_vsqaddq_v: {
8042     Int = Intrinsic::aarch64_neon_usqadd;
8043     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
8044   }
8045   case NEON::BI__builtin_neon_vuqadd_v:
8046   case NEON::BI__builtin_neon_vuqaddq_v: {
8047     Int = Intrinsic::aarch64_neon_suqadd;
8048     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
8049   }
8050   }
8051 }
8052 
8053 llvm::Value *CodeGenFunction::
8054 BuildVector(ArrayRef<llvm::Value*> Ops) {
8055   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
8056          "Not a power-of-two sized vector!");
8057   bool AllConstants = true;
8058   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
8059     AllConstants &= isa<Constant>(Ops[i]);
8060 
8061   // If this is a constant vector, create a ConstantVector.
8062   if (AllConstants) {
8063     SmallVector<llvm::Constant*, 16> CstOps;
8064     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
8065       CstOps.push_back(cast<Constant>(Ops[i]));
8066     return llvm::ConstantVector::get(CstOps);
8067   }
8068 
8069   // Otherwise, insertelement the values to build the vector.
8070   Value *Result =
8071     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
8072 
8073   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
8074     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
8075 
8076   return Result;
8077 }
8078 
8079 // Convert the mask from an integer type to a vector of i1.
8080 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
8081                               unsigned NumElts) {
8082 
8083   llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
8084                          cast<IntegerType>(Mask->getType())->getBitWidth());
8085   Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
8086 
8087   // If we have less than 8 elements, then the starting mask was an i8 and
8088   // we need to extract down to the right number of elements.
8089   if (NumElts < 8) {
8090     uint32_t Indices[4];
8091     for (unsigned i = 0; i != NumElts; ++i)
8092       Indices[i] = i;
8093     MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
8094                                              makeArrayRef(Indices, NumElts),
8095                                              "extract");
8096   }
8097   return MaskVec;
8098 }
8099 
8100 static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
8101                                  SmallVectorImpl<Value *> &Ops,
8102                                  unsigned Align) {
8103   // Cast the pointer to right type.
8104   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
8105                                llvm::PointerType::getUnqual(Ops[1]->getType()));
8106 
8107   // If the mask is all ones just emit a regular store.
8108   if (const auto *C = dyn_cast<Constant>(Ops[2]))
8109     if (C->isAllOnesValue())
8110       return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
8111 
8112   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
8113                                    Ops[1]->getType()->getVectorNumElements());
8114 
8115   return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
8116 }
8117 
8118 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
8119                                 SmallVectorImpl<Value *> &Ops, unsigned Align) {
8120   // Cast the pointer to right type.
8121   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
8122                                llvm::PointerType::getUnqual(Ops[1]->getType()));
8123 
8124   // If the mask is all ones just emit a regular store.
8125   if (const auto *C = dyn_cast<Constant>(Ops[2]))
8126     if (C->isAllOnesValue())
8127       return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
8128 
8129   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
8130                                    Ops[1]->getType()->getVectorNumElements());
8131 
8132   return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
8133 }
8134 
8135 static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
8136                               unsigned NumElts, SmallVectorImpl<Value *> &Ops,
8137                               bool InvertLHS = false) {
8138   Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
8139   Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
8140 
8141   if (InvertLHS)
8142     LHS = CGF.Builder.CreateNot(LHS);
8143 
8144   return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
8145                                   CGF.Builder.getIntNTy(std::max(NumElts, 8U)));
8146 }
8147 
8148 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
8149                                         SmallVectorImpl<Value *> &Ops,
8150                                         llvm::Type *DstTy,
8151                                         unsigned SrcSizeInBits,
8152                                         unsigned Align) {
8153   // Load the subvector.
8154   Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
8155 
8156   // Create broadcast mask.
8157   unsigned NumDstElts = DstTy->getVectorNumElements();
8158   unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
8159 
8160   SmallVector<uint32_t, 8> Mask;
8161   for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
8162     for (unsigned j = 0; j != NumSrcElts; ++j)
8163       Mask.push_back(j);
8164 
8165   return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
8166 }
8167 
8168 static Value *EmitX86Select(CodeGenFunction &CGF,
8169                             Value *Mask, Value *Op0, Value *Op1) {
8170 
8171   // If the mask is all ones just return first argument.
8172   if (const auto *C = dyn_cast<Constant>(Mask))
8173     if (C->isAllOnesValue())
8174       return Op0;
8175 
8176   Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
8177 
8178   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
8179 }
8180 
8181 static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,
8182                                          unsigned NumElts, Value *MaskIn) {
8183   if (MaskIn) {
8184     const auto *C = dyn_cast<Constant>(MaskIn);
8185     if (!C || !C->isAllOnesValue())
8186       Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
8187   }
8188 
8189   if (NumElts < 8) {
8190     uint32_t Indices[8];
8191     for (unsigned i = 0; i != NumElts; ++i)
8192       Indices[i] = i;
8193     for (unsigned i = NumElts; i != 8; ++i)
8194       Indices[i] = i % NumElts + NumElts;
8195     Cmp = CGF.Builder.CreateShuffleVector(
8196         Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
8197   }
8198 
8199   return CGF.Builder.CreateBitCast(Cmp,
8200                                    IntegerType::get(CGF.getLLVMContext(),
8201                                                     std::max(NumElts, 8U)));
8202 }
8203 
8204 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
8205                                    bool Signed, ArrayRef<Value *> Ops) {
8206   assert((Ops.size() == 2 || Ops.size() == 4) &&
8207          "Unexpected number of arguments");
8208   unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
8209   Value *Cmp;
8210 
8211   if (CC == 3) {
8212     Cmp = Constant::getNullValue(
8213                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
8214   } else if (CC == 7) {
8215     Cmp = Constant::getAllOnesValue(
8216                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
8217   } else {
8218     ICmpInst::Predicate Pred;
8219     switch (CC) {
8220     default: llvm_unreachable("Unknown condition code");
8221     case 0: Pred = ICmpInst::ICMP_EQ;  break;
8222     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
8223     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
8224     case 4: Pred = ICmpInst::ICMP_NE;  break;
8225     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
8226     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
8227     }
8228     Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
8229   }
8230 
8231   Value *MaskIn = nullptr;
8232   if (Ops.size() == 4)
8233     MaskIn = Ops[3];
8234 
8235   return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
8236 }
8237 
8238 static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
8239   Value *Zero = Constant::getNullValue(In->getType());
8240   return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
8241 }
8242 
8243 static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) {
8244 
8245   llvm::Type *Ty = Ops[0]->getType();
8246   Value *Zero = llvm::Constant::getNullValue(Ty);
8247   Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]);
8248   Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero);
8249   Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub);
8250   if (Ops.size() == 1)
8251     return Res;
8252   return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
8253 }
8254 
8255 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
8256                             ArrayRef<Value *> Ops) {
8257   Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
8258   Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
8259 
8260   if (Ops.size() == 2)
8261     return Res;
8262 
8263   assert(Ops.size() == 4);
8264   return EmitX86Select(CGF, Ops[3], Res, Ops[2]);
8265 }
8266 
8267 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
8268                               llvm::Type *DstTy) {
8269   unsigned NumberOfElements = DstTy->getVectorNumElements();
8270   Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
8271   return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
8272 }
8273 
8274 Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
8275   const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
8276   StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
8277   return EmitX86CpuIs(CPUStr);
8278 }
8279 
8280 Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
8281 
8282   llvm::Type *Int32Ty = Builder.getInt32Ty();
8283 
8284   // Matching the struct layout from the compiler-rt/libgcc structure that is
8285   // filled in:
8286   // unsigned int __cpu_vendor;
8287   // unsigned int __cpu_type;
8288   // unsigned int __cpu_subtype;
8289   // unsigned int __cpu_features[1];
8290   llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
8291                                           llvm::ArrayType::get(Int32Ty, 1));
8292 
8293   // Grab the global __cpu_model.
8294   llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
8295 
8296   // Calculate the index needed to access the correct field based on the
8297   // range. Also adjust the expected value.
8298   unsigned Index;
8299   unsigned Value;
8300   std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
8301 #define X86_VENDOR(ENUM, STRING)                                               \
8302   .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
8303 #define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS)             \
8304   .Cases(STR, ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
8305 #define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR)                               \
8306   .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
8307 #define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR)                            \
8308   .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
8309 #include "llvm/Support/X86TargetParser.def"
8310                                .Default({0, 0});
8311   assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
8312 
8313   // Grab the appropriate field from __cpu_model.
8314   llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
8315                          ConstantInt::get(Int32Ty, Index)};
8316   llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
8317   CpuValue = Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4));
8318 
8319   // Check the value of the field against the requested value.
8320   return Builder.CreateICmpEQ(CpuValue,
8321                                   llvm::ConstantInt::get(Int32Ty, Value));
8322 }
8323 
8324 Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
8325   const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
8326   StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
8327   return EmitX86CpuSupports(FeatureStr);
8328 }
8329 
8330 Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
8331   // Processor features and mapping to processor feature value.
8332 
8333   uint32_t FeaturesMask = 0;
8334 
8335   for (const StringRef &FeatureStr : FeatureStrs) {
8336     unsigned Feature =
8337         StringSwitch<unsigned>(FeatureStr)
8338 #define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL)
8339 #include "llvm/Support/X86TargetParser.def"
8340         ;
8341     FeaturesMask |= (1U << Feature);
8342   }
8343 
8344   // Matching the struct layout from the compiler-rt/libgcc structure that is
8345   // filled in:
8346   // unsigned int __cpu_vendor;
8347   // unsigned int __cpu_type;
8348   // unsigned int __cpu_subtype;
8349   // unsigned int __cpu_features[1];
8350   llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
8351                                           llvm::ArrayType::get(Int32Ty, 1));
8352 
8353   // Grab the global __cpu_model.
8354   llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
8355 
8356   // Grab the first (0th) element from the field __cpu_features off of the
8357   // global in the struct STy.
8358   Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 3),
8359                    ConstantInt::get(Int32Ty, 0)};
8360   Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
8361   Value *Features =
8362       Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4));
8363 
8364   // Check the value of the bit corresponding to the feature requested.
8365   Value *Bitset = Builder.CreateAnd(
8366       Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask));
8367   return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
8368 }
8369 
8370 Value *CodeGenFunction::EmitX86CpuInit() {
8371   llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
8372                                                     /*Variadic*/ false);
8373   llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
8374   return Builder.CreateCall(Func);
8375 }
8376 
8377 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
8378                                            const CallExpr *E) {
8379   if (BuiltinID == X86::BI__builtin_cpu_is)
8380     return EmitX86CpuIs(E);
8381   if (BuiltinID == X86::BI__builtin_cpu_supports)
8382     return EmitX86CpuSupports(E);
8383   if (BuiltinID == X86::BI__builtin_cpu_init)
8384     return EmitX86CpuInit();
8385 
8386   SmallVector<Value*, 4> Ops;
8387 
8388   // Find out if any arguments are required to be integer constant expressions.
8389   unsigned ICEArguments = 0;
8390   ASTContext::GetBuiltinTypeError Error;
8391   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
8392   assert(Error == ASTContext::GE_None && "Should not codegen an error");
8393 
8394   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
8395     // If this is a normal argument, just emit it as a scalar.
8396     if ((ICEArguments & (1 << i)) == 0) {
8397       Ops.push_back(EmitScalarExpr(E->getArg(i)));
8398       continue;
8399     }
8400 
8401     // If this is required to be a constant, constant fold it so that we know
8402     // that the generated intrinsic gets a ConstantInt.
8403     llvm::APSInt Result;
8404     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
8405     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
8406     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
8407   }
8408 
8409   // These exist so that the builtin that takes an immediate can be bounds
8410   // checked by clang to avoid passing bad immediates to the backend. Since
8411   // AVX has a larger immediate than SSE we would need separate builtins to
8412   // do the different bounds checking. Rather than create a clang specific
8413   // SSE only builtin, this implements eight separate builtins to match gcc
8414   // implementation.
8415   auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
8416     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
8417     llvm::Function *F = CGM.getIntrinsic(ID);
8418     return Builder.CreateCall(F, Ops);
8419   };
8420 
8421   // For the vector forms of FP comparisons, translate the builtins directly to
8422   // IR.
8423   // TODO: The builtins could be removed if the SSE header files used vector
8424   // extension comparisons directly (vector ordered/unordered may need
8425   // additional support via __builtin_isnan()).
8426   auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
8427     Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
8428     llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
8429     llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
8430     Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
8431     return Builder.CreateBitCast(Sext, FPVecTy);
8432   };
8433 
8434   switch (BuiltinID) {
8435   default: return nullptr;
8436   case X86::BI_mm_prefetch: {
8437     Value *Address = Ops[0];
8438     ConstantInt *C = cast<ConstantInt>(Ops[1]);
8439     Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
8440     Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
8441     Value *Data = ConstantInt::get(Int32Ty, 1);
8442     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
8443     return Builder.CreateCall(F, {Address, RW, Locality, Data});
8444   }
8445   case X86::BI_mm_clflush: {
8446     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
8447                               Ops[0]);
8448   }
8449   case X86::BI_mm_lfence: {
8450     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
8451   }
8452   case X86::BI_mm_mfence: {
8453     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
8454   }
8455   case X86::BI_mm_sfence: {
8456     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
8457   }
8458   case X86::BI_mm_pause: {
8459     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
8460   }
8461   case X86::BI__rdtsc: {
8462     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
8463   }
8464   case X86::BI__builtin_ia32_undef128:
8465   case X86::BI__builtin_ia32_undef256:
8466   case X86::BI__builtin_ia32_undef512:
8467     // The x86 definition of "undef" is not the same as the LLVM definition
8468     // (PR32176). We leave optimizing away an unnecessary zero constant to the
8469     // IR optimizer and backend.
8470     // TODO: If we had a "freeze" IR instruction to generate a fixed undef
8471     // value, we should use that here instead of a zero.
8472     return llvm::Constant::getNullValue(ConvertType(E->getType()));
8473   case X86::BI__builtin_ia32_vec_init_v8qi:
8474   case X86::BI__builtin_ia32_vec_init_v4hi:
8475   case X86::BI__builtin_ia32_vec_init_v2si:
8476     return Builder.CreateBitCast(BuildVector(Ops),
8477                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
8478   case X86::BI__builtin_ia32_vec_ext_v2si:
8479     return Builder.CreateExtractElement(Ops[0],
8480                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
8481   case X86::BI_mm_setcsr:
8482   case X86::BI__builtin_ia32_ldmxcsr: {
8483     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
8484     Builder.CreateStore(Ops[0], Tmp);
8485     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
8486                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
8487   }
8488   case X86::BI_mm_getcsr:
8489   case X86::BI__builtin_ia32_stmxcsr: {
8490     Address Tmp = CreateMemTemp(E->getType());
8491     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
8492                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
8493     return Builder.CreateLoad(Tmp, "stmxcsr");
8494   }
8495   case X86::BI__builtin_ia32_xsave:
8496   case X86::BI__builtin_ia32_xsave64:
8497   case X86::BI__builtin_ia32_xrstor:
8498   case X86::BI__builtin_ia32_xrstor64:
8499   case X86::BI__builtin_ia32_xsaveopt:
8500   case X86::BI__builtin_ia32_xsaveopt64:
8501   case X86::BI__builtin_ia32_xrstors:
8502   case X86::BI__builtin_ia32_xrstors64:
8503   case X86::BI__builtin_ia32_xsavec:
8504   case X86::BI__builtin_ia32_xsavec64:
8505   case X86::BI__builtin_ia32_xsaves:
8506   case X86::BI__builtin_ia32_xsaves64: {
8507     Intrinsic::ID ID;
8508 #define INTRINSIC_X86_XSAVE_ID(NAME) \
8509     case X86::BI__builtin_ia32_##NAME: \
8510       ID = Intrinsic::x86_##NAME; \
8511       break
8512     switch (BuiltinID) {
8513     default: llvm_unreachable("Unsupported intrinsic!");
8514     INTRINSIC_X86_XSAVE_ID(xsave);
8515     INTRINSIC_X86_XSAVE_ID(xsave64);
8516     INTRINSIC_X86_XSAVE_ID(xrstor);
8517     INTRINSIC_X86_XSAVE_ID(xrstor64);
8518     INTRINSIC_X86_XSAVE_ID(xsaveopt);
8519     INTRINSIC_X86_XSAVE_ID(xsaveopt64);
8520     INTRINSIC_X86_XSAVE_ID(xrstors);
8521     INTRINSIC_X86_XSAVE_ID(xrstors64);
8522     INTRINSIC_X86_XSAVE_ID(xsavec);
8523     INTRINSIC_X86_XSAVE_ID(xsavec64);
8524     INTRINSIC_X86_XSAVE_ID(xsaves);
8525     INTRINSIC_X86_XSAVE_ID(xsaves64);
8526     }
8527 #undef INTRINSIC_X86_XSAVE_ID
8528     Value *Mhi = Builder.CreateTrunc(
8529       Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
8530     Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
8531     Ops[1] = Mhi;
8532     Ops.push_back(Mlo);
8533     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
8534   }
8535   case X86::BI__builtin_ia32_storedqudi128_mask:
8536   case X86::BI__builtin_ia32_storedqusi128_mask:
8537   case X86::BI__builtin_ia32_storedquhi128_mask:
8538   case X86::BI__builtin_ia32_storedquqi128_mask:
8539   case X86::BI__builtin_ia32_storeupd128_mask:
8540   case X86::BI__builtin_ia32_storeups128_mask:
8541   case X86::BI__builtin_ia32_storedqudi256_mask:
8542   case X86::BI__builtin_ia32_storedqusi256_mask:
8543   case X86::BI__builtin_ia32_storedquhi256_mask:
8544   case X86::BI__builtin_ia32_storedquqi256_mask:
8545   case X86::BI__builtin_ia32_storeupd256_mask:
8546   case X86::BI__builtin_ia32_storeups256_mask:
8547   case X86::BI__builtin_ia32_storedqudi512_mask:
8548   case X86::BI__builtin_ia32_storedqusi512_mask:
8549   case X86::BI__builtin_ia32_storedquhi512_mask:
8550   case X86::BI__builtin_ia32_storedquqi512_mask:
8551   case X86::BI__builtin_ia32_storeupd512_mask:
8552   case X86::BI__builtin_ia32_storeups512_mask:
8553     return EmitX86MaskedStore(*this, Ops, 1);
8554 
8555   case X86::BI__builtin_ia32_storess128_mask:
8556   case X86::BI__builtin_ia32_storesd128_mask: {
8557     return EmitX86MaskedStore(*this, Ops, 16);
8558   }
8559   case X86::BI__builtin_ia32_vpopcntb_128:
8560   case X86::BI__builtin_ia32_vpopcntd_128:
8561   case X86::BI__builtin_ia32_vpopcntq_128:
8562   case X86::BI__builtin_ia32_vpopcntw_128:
8563   case X86::BI__builtin_ia32_vpopcntb_256:
8564   case X86::BI__builtin_ia32_vpopcntd_256:
8565   case X86::BI__builtin_ia32_vpopcntq_256:
8566   case X86::BI__builtin_ia32_vpopcntw_256:
8567   case X86::BI__builtin_ia32_vpopcntb_512:
8568   case X86::BI__builtin_ia32_vpopcntd_512:
8569   case X86::BI__builtin_ia32_vpopcntq_512:
8570   case X86::BI__builtin_ia32_vpopcntw_512: {
8571     llvm::Type *ResultType = ConvertType(E->getType());
8572     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8573     return Builder.CreateCall(F, Ops);
8574   }
8575   case X86::BI__builtin_ia32_cvtmask2b128:
8576   case X86::BI__builtin_ia32_cvtmask2b256:
8577   case X86::BI__builtin_ia32_cvtmask2b512:
8578   case X86::BI__builtin_ia32_cvtmask2w128:
8579   case X86::BI__builtin_ia32_cvtmask2w256:
8580   case X86::BI__builtin_ia32_cvtmask2w512:
8581   case X86::BI__builtin_ia32_cvtmask2d128:
8582   case X86::BI__builtin_ia32_cvtmask2d256:
8583   case X86::BI__builtin_ia32_cvtmask2d512:
8584   case X86::BI__builtin_ia32_cvtmask2q128:
8585   case X86::BI__builtin_ia32_cvtmask2q256:
8586   case X86::BI__builtin_ia32_cvtmask2q512:
8587     return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
8588 
8589   case X86::BI__builtin_ia32_cvtb2mask128:
8590   case X86::BI__builtin_ia32_cvtb2mask256:
8591   case X86::BI__builtin_ia32_cvtb2mask512:
8592   case X86::BI__builtin_ia32_cvtw2mask128:
8593   case X86::BI__builtin_ia32_cvtw2mask256:
8594   case X86::BI__builtin_ia32_cvtw2mask512:
8595   case X86::BI__builtin_ia32_cvtd2mask128:
8596   case X86::BI__builtin_ia32_cvtd2mask256:
8597   case X86::BI__builtin_ia32_cvtd2mask512:
8598   case X86::BI__builtin_ia32_cvtq2mask128:
8599   case X86::BI__builtin_ia32_cvtq2mask256:
8600   case X86::BI__builtin_ia32_cvtq2mask512:
8601     return EmitX86ConvertToMask(*this, Ops[0]);
8602 
8603   case X86::BI__builtin_ia32_movdqa32store128_mask:
8604   case X86::BI__builtin_ia32_movdqa64store128_mask:
8605   case X86::BI__builtin_ia32_storeaps128_mask:
8606   case X86::BI__builtin_ia32_storeapd128_mask:
8607   case X86::BI__builtin_ia32_movdqa32store256_mask:
8608   case X86::BI__builtin_ia32_movdqa64store256_mask:
8609   case X86::BI__builtin_ia32_storeaps256_mask:
8610   case X86::BI__builtin_ia32_storeapd256_mask:
8611   case X86::BI__builtin_ia32_movdqa32store512_mask:
8612   case X86::BI__builtin_ia32_movdqa64store512_mask:
8613   case X86::BI__builtin_ia32_storeaps512_mask:
8614   case X86::BI__builtin_ia32_storeapd512_mask: {
8615     unsigned Align =
8616       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
8617     return EmitX86MaskedStore(*this, Ops, Align);
8618   }
8619   case X86::BI__builtin_ia32_loadups128_mask:
8620   case X86::BI__builtin_ia32_loadups256_mask:
8621   case X86::BI__builtin_ia32_loadups512_mask:
8622   case X86::BI__builtin_ia32_loadupd128_mask:
8623   case X86::BI__builtin_ia32_loadupd256_mask:
8624   case X86::BI__builtin_ia32_loadupd512_mask:
8625   case X86::BI__builtin_ia32_loaddquqi128_mask:
8626   case X86::BI__builtin_ia32_loaddquqi256_mask:
8627   case X86::BI__builtin_ia32_loaddquqi512_mask:
8628   case X86::BI__builtin_ia32_loaddquhi128_mask:
8629   case X86::BI__builtin_ia32_loaddquhi256_mask:
8630   case X86::BI__builtin_ia32_loaddquhi512_mask:
8631   case X86::BI__builtin_ia32_loaddqusi128_mask:
8632   case X86::BI__builtin_ia32_loaddqusi256_mask:
8633   case X86::BI__builtin_ia32_loaddqusi512_mask:
8634   case X86::BI__builtin_ia32_loaddqudi128_mask:
8635   case X86::BI__builtin_ia32_loaddqudi256_mask:
8636   case X86::BI__builtin_ia32_loaddqudi512_mask:
8637     return EmitX86MaskedLoad(*this, Ops, 1);
8638 
8639   case X86::BI__builtin_ia32_loadss128_mask:
8640   case X86::BI__builtin_ia32_loadsd128_mask:
8641     return EmitX86MaskedLoad(*this, Ops, 16);
8642 
8643   case X86::BI__builtin_ia32_loadaps128_mask:
8644   case X86::BI__builtin_ia32_loadaps256_mask:
8645   case X86::BI__builtin_ia32_loadaps512_mask:
8646   case X86::BI__builtin_ia32_loadapd128_mask:
8647   case X86::BI__builtin_ia32_loadapd256_mask:
8648   case X86::BI__builtin_ia32_loadapd512_mask:
8649   case X86::BI__builtin_ia32_movdqa32load128_mask:
8650   case X86::BI__builtin_ia32_movdqa32load256_mask:
8651   case X86::BI__builtin_ia32_movdqa32load512_mask:
8652   case X86::BI__builtin_ia32_movdqa64load128_mask:
8653   case X86::BI__builtin_ia32_movdqa64load256_mask:
8654   case X86::BI__builtin_ia32_movdqa64load512_mask: {
8655     unsigned Align =
8656       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
8657     return EmitX86MaskedLoad(*this, Ops, Align);
8658   }
8659 
8660   case X86::BI__builtin_ia32_vbroadcastf128_pd256:
8661   case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
8662     llvm::Type *DstTy = ConvertType(E->getType());
8663     return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
8664   }
8665 
8666   case X86::BI__builtin_ia32_storehps:
8667   case X86::BI__builtin_ia32_storelps: {
8668     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
8669     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
8670 
8671     // cast val v2i64
8672     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
8673 
8674     // extract (0, 1)
8675     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
8676     llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
8677     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
8678 
8679     // cast pointer to i64 & store
8680     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
8681     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8682   }
8683   case X86::BI__builtin_ia32_palignr128:
8684   case X86::BI__builtin_ia32_palignr256:
8685   case X86::BI__builtin_ia32_palignr512_mask: {
8686     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
8687 
8688     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
8689     assert(NumElts % 16 == 0);
8690 
8691     // If palignr is shifting the pair of vectors more than the size of two
8692     // lanes, emit zero.
8693     if (ShiftVal >= 32)
8694       return llvm::Constant::getNullValue(ConvertType(E->getType()));
8695 
8696     // If palignr is shifting the pair of input vectors more than one lane,
8697     // but less than two lanes, convert to shifting in zeroes.
8698     if (ShiftVal > 16) {
8699       ShiftVal -= 16;
8700       Ops[1] = Ops[0];
8701       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
8702     }
8703 
8704     uint32_t Indices[64];
8705     // 256-bit palignr operates on 128-bit lanes so we need to handle that
8706     for (unsigned l = 0; l != NumElts; l += 16) {
8707       for (unsigned i = 0; i != 16; ++i) {
8708         unsigned Idx = ShiftVal + i;
8709         if (Idx >= 16)
8710           Idx += NumElts - 16; // End of lane, switch operand.
8711         Indices[l + i] = Idx + l;
8712       }
8713     }
8714 
8715     Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
8716                                                makeArrayRef(Indices, NumElts),
8717                                                "palignr");
8718 
8719     // If this isn't a masked builtin, just return the align operation.
8720     if (Ops.size() == 3)
8721       return Align;
8722 
8723     return EmitX86Select(*this, Ops[4], Align, Ops[3]);
8724   }
8725 
8726   case X86::BI__builtin_ia32_vperm2f128_pd256:
8727   case X86::BI__builtin_ia32_vperm2f128_ps256:
8728   case X86::BI__builtin_ia32_vperm2f128_si256:
8729   case X86::BI__builtin_ia32_permti256: {
8730     unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
8731     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
8732 
8733     // This takes a very simple approach since there are two lanes and a
8734     // shuffle can have 2 inputs. So we reserve the first input for the first
8735     // lane and the second input for the second lane. This may result in
8736     // duplicate sources, but this can be dealt with in the backend.
8737 
8738     Value *OutOps[2];
8739     uint32_t Indices[8];
8740     for (unsigned l = 0; l != 2; ++l) {
8741       // Determine the source for this lane.
8742       if (Imm & (1 << ((l * 4) + 3)))
8743         OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
8744       else if (Imm & (1 << ((l * 4) + 1)))
8745         OutOps[l] = Ops[1];
8746       else
8747         OutOps[l] = Ops[0];
8748 
8749       for (unsigned i = 0; i != NumElts/2; ++i) {
8750         // Start with ith element of the source for this lane.
8751         unsigned Idx = (l * NumElts) + i;
8752         // If bit 0 of the immediate half is set, switch to the high half of
8753         // the source.
8754         if (Imm & (1 << (l * 4)))
8755           Idx += NumElts/2;
8756         Indices[(l * (NumElts/2)) + i] = Idx;
8757       }
8758     }
8759 
8760     return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
8761                                        makeArrayRef(Indices, NumElts),
8762                                        "vperm");
8763   }
8764 
8765   case X86::BI__builtin_ia32_movnti:
8766   case X86::BI__builtin_ia32_movnti64:
8767   case X86::BI__builtin_ia32_movntsd:
8768   case X86::BI__builtin_ia32_movntss: {
8769     llvm::MDNode *Node = llvm::MDNode::get(
8770         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
8771 
8772     Value *Ptr = Ops[0];
8773     Value *Src = Ops[1];
8774 
8775     // Extract the 0'th element of the source vector.
8776     if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
8777         BuiltinID == X86::BI__builtin_ia32_movntss)
8778       Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
8779 
8780     // Convert the type of the pointer to a pointer to the stored type.
8781     Value *BC = Builder.CreateBitCast(
8782         Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
8783 
8784     // Unaligned nontemporal store of the scalar value.
8785     StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
8786     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
8787     SI->setAlignment(1);
8788     return SI;
8789   }
8790 
8791   case X86::BI__builtin_ia32_selectb_128:
8792   case X86::BI__builtin_ia32_selectb_256:
8793   case X86::BI__builtin_ia32_selectb_512:
8794   case X86::BI__builtin_ia32_selectw_128:
8795   case X86::BI__builtin_ia32_selectw_256:
8796   case X86::BI__builtin_ia32_selectw_512:
8797   case X86::BI__builtin_ia32_selectd_128:
8798   case X86::BI__builtin_ia32_selectd_256:
8799   case X86::BI__builtin_ia32_selectd_512:
8800   case X86::BI__builtin_ia32_selectq_128:
8801   case X86::BI__builtin_ia32_selectq_256:
8802   case X86::BI__builtin_ia32_selectq_512:
8803   case X86::BI__builtin_ia32_selectps_128:
8804   case X86::BI__builtin_ia32_selectps_256:
8805   case X86::BI__builtin_ia32_selectps_512:
8806   case X86::BI__builtin_ia32_selectpd_128:
8807   case X86::BI__builtin_ia32_selectpd_256:
8808   case X86::BI__builtin_ia32_selectpd_512:
8809     return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
8810   case X86::BI__builtin_ia32_cmpb128_mask:
8811   case X86::BI__builtin_ia32_cmpb256_mask:
8812   case X86::BI__builtin_ia32_cmpb512_mask:
8813   case X86::BI__builtin_ia32_cmpw128_mask:
8814   case X86::BI__builtin_ia32_cmpw256_mask:
8815   case X86::BI__builtin_ia32_cmpw512_mask:
8816   case X86::BI__builtin_ia32_cmpd128_mask:
8817   case X86::BI__builtin_ia32_cmpd256_mask:
8818   case X86::BI__builtin_ia32_cmpd512_mask:
8819   case X86::BI__builtin_ia32_cmpq128_mask:
8820   case X86::BI__builtin_ia32_cmpq256_mask:
8821   case X86::BI__builtin_ia32_cmpq512_mask: {
8822     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
8823     return EmitX86MaskedCompare(*this, CC, true, Ops);
8824   }
8825   case X86::BI__builtin_ia32_ucmpb128_mask:
8826   case X86::BI__builtin_ia32_ucmpb256_mask:
8827   case X86::BI__builtin_ia32_ucmpb512_mask:
8828   case X86::BI__builtin_ia32_ucmpw128_mask:
8829   case X86::BI__builtin_ia32_ucmpw256_mask:
8830   case X86::BI__builtin_ia32_ucmpw512_mask:
8831   case X86::BI__builtin_ia32_ucmpd128_mask:
8832   case X86::BI__builtin_ia32_ucmpd256_mask:
8833   case X86::BI__builtin_ia32_ucmpd512_mask:
8834   case X86::BI__builtin_ia32_ucmpq128_mask:
8835   case X86::BI__builtin_ia32_ucmpq256_mask:
8836   case X86::BI__builtin_ia32_ucmpq512_mask: {
8837     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
8838     return EmitX86MaskedCompare(*this, CC, false, Ops);
8839   }
8840 
8841   case X86::BI__builtin_ia32_kortestchi:
8842   case X86::BI__builtin_ia32_kortestzhi: {
8843     Value *Or = EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
8844     Value *C;
8845     if (BuiltinID == X86::BI__builtin_ia32_kortestchi)
8846       C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty());
8847     else
8848       C = llvm::Constant::getNullValue(Builder.getInt16Ty());
8849     Value *Cmp = Builder.CreateICmpEQ(Or, C);
8850     return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
8851   }
8852 
8853   case X86::BI__builtin_ia32_kandhi:
8854     return EmitX86MaskLogic(*this, Instruction::And, 16, Ops);
8855   case X86::BI__builtin_ia32_kandnhi:
8856     return EmitX86MaskLogic(*this, Instruction::And, 16, Ops, true);
8857   case X86::BI__builtin_ia32_korhi:
8858     return EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
8859   case X86::BI__builtin_ia32_kxnorhi:
8860     return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops, true);
8861   case X86::BI__builtin_ia32_kxorhi:
8862     return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops);
8863   case X86::BI__builtin_ia32_knothi: {
8864     Ops[0] = getMaskVecValue(*this, Ops[0], 16);
8865     return Builder.CreateBitCast(Builder.CreateNot(Ops[0]),
8866                                  Builder.getInt16Ty());
8867   }
8868 
8869   case X86::BI__builtin_ia32_kunpckdi:
8870   case X86::BI__builtin_ia32_kunpcksi:
8871   case X86::BI__builtin_ia32_kunpckhi: {
8872     unsigned NumElts = Ops[0]->getType()->getScalarSizeInBits();
8873     Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
8874     Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
8875     uint32_t Indices[64];
8876     for (unsigned i = 0; i != NumElts; ++i)
8877       Indices[i] = i;
8878 
8879     // First extract half of each vector. This gives better codegen than
8880     // doing it in a single shuffle.
8881     LHS = Builder.CreateShuffleVector(LHS, LHS,
8882                                       makeArrayRef(Indices, NumElts / 2));
8883     RHS = Builder.CreateShuffleVector(RHS, RHS,
8884                                       makeArrayRef(Indices, NumElts / 2));
8885     // Concat the vectors.
8886     // NOTE: Operands are swapped to match the intrinsic definition.
8887     Value *Res = Builder.CreateShuffleVector(RHS, LHS,
8888                                              makeArrayRef(Indices, NumElts));
8889     return Builder.CreateBitCast(Res, Ops[0]->getType());
8890   }
8891 
8892   case X86::BI__builtin_ia32_vplzcntd_128_mask:
8893   case X86::BI__builtin_ia32_vplzcntd_256_mask:
8894   case X86::BI__builtin_ia32_vplzcntd_512_mask:
8895   case X86::BI__builtin_ia32_vplzcntq_128_mask:
8896   case X86::BI__builtin_ia32_vplzcntq_256_mask:
8897   case X86::BI__builtin_ia32_vplzcntq_512_mask: {
8898     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
8899     return EmitX86Select(*this, Ops[2],
8900                          Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
8901                          Ops[1]);
8902   }
8903 
8904   case X86::BI__builtin_ia32_pabsb128:
8905   case X86::BI__builtin_ia32_pabsw128:
8906   case X86::BI__builtin_ia32_pabsd128:
8907   case X86::BI__builtin_ia32_pabsb256:
8908   case X86::BI__builtin_ia32_pabsw256:
8909   case X86::BI__builtin_ia32_pabsd256:
8910   case X86::BI__builtin_ia32_pabsq128_mask:
8911   case X86::BI__builtin_ia32_pabsq256_mask:
8912   case X86::BI__builtin_ia32_pabsb512_mask:
8913   case X86::BI__builtin_ia32_pabsw512_mask:
8914   case X86::BI__builtin_ia32_pabsd512_mask:
8915   case X86::BI__builtin_ia32_pabsq512_mask:
8916     return EmitX86Abs(*this, Ops);
8917 
8918   case X86::BI__builtin_ia32_pmaxsb128:
8919   case X86::BI__builtin_ia32_pmaxsw128:
8920   case X86::BI__builtin_ia32_pmaxsd128:
8921   case X86::BI__builtin_ia32_pmaxsq128_mask:
8922   case X86::BI__builtin_ia32_pmaxsb256:
8923   case X86::BI__builtin_ia32_pmaxsw256:
8924   case X86::BI__builtin_ia32_pmaxsd256:
8925   case X86::BI__builtin_ia32_pmaxsq256_mask:
8926   case X86::BI__builtin_ia32_pmaxsb512_mask:
8927   case X86::BI__builtin_ia32_pmaxsw512_mask:
8928   case X86::BI__builtin_ia32_pmaxsd512_mask:
8929   case X86::BI__builtin_ia32_pmaxsq512_mask:
8930     return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
8931   case X86::BI__builtin_ia32_pmaxub128:
8932   case X86::BI__builtin_ia32_pmaxuw128:
8933   case X86::BI__builtin_ia32_pmaxud128:
8934   case X86::BI__builtin_ia32_pmaxuq128_mask:
8935   case X86::BI__builtin_ia32_pmaxub256:
8936   case X86::BI__builtin_ia32_pmaxuw256:
8937   case X86::BI__builtin_ia32_pmaxud256:
8938   case X86::BI__builtin_ia32_pmaxuq256_mask:
8939   case X86::BI__builtin_ia32_pmaxub512_mask:
8940   case X86::BI__builtin_ia32_pmaxuw512_mask:
8941   case X86::BI__builtin_ia32_pmaxud512_mask:
8942   case X86::BI__builtin_ia32_pmaxuq512_mask:
8943     return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
8944   case X86::BI__builtin_ia32_pminsb128:
8945   case X86::BI__builtin_ia32_pminsw128:
8946   case X86::BI__builtin_ia32_pminsd128:
8947   case X86::BI__builtin_ia32_pminsq128_mask:
8948   case X86::BI__builtin_ia32_pminsb256:
8949   case X86::BI__builtin_ia32_pminsw256:
8950   case X86::BI__builtin_ia32_pminsd256:
8951   case X86::BI__builtin_ia32_pminsq256_mask:
8952   case X86::BI__builtin_ia32_pminsb512_mask:
8953   case X86::BI__builtin_ia32_pminsw512_mask:
8954   case X86::BI__builtin_ia32_pminsd512_mask:
8955   case X86::BI__builtin_ia32_pminsq512_mask:
8956     return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
8957   case X86::BI__builtin_ia32_pminub128:
8958   case X86::BI__builtin_ia32_pminuw128:
8959   case X86::BI__builtin_ia32_pminud128:
8960   case X86::BI__builtin_ia32_pminuq128_mask:
8961   case X86::BI__builtin_ia32_pminub256:
8962   case X86::BI__builtin_ia32_pminuw256:
8963   case X86::BI__builtin_ia32_pminud256:
8964   case X86::BI__builtin_ia32_pminuq256_mask:
8965   case X86::BI__builtin_ia32_pminub512_mask:
8966   case X86::BI__builtin_ia32_pminuw512_mask:
8967   case X86::BI__builtin_ia32_pminud512_mask:
8968   case X86::BI__builtin_ia32_pminuq512_mask:
8969     return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
8970 
8971   // 3DNow!
8972   case X86::BI__builtin_ia32_pswapdsf:
8973   case X86::BI__builtin_ia32_pswapdsi: {
8974     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
8975     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
8976     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
8977     return Builder.CreateCall(F, Ops, "pswapd");
8978   }
8979   case X86::BI__builtin_ia32_rdrand16_step:
8980   case X86::BI__builtin_ia32_rdrand32_step:
8981   case X86::BI__builtin_ia32_rdrand64_step:
8982   case X86::BI__builtin_ia32_rdseed16_step:
8983   case X86::BI__builtin_ia32_rdseed32_step:
8984   case X86::BI__builtin_ia32_rdseed64_step: {
8985     Intrinsic::ID ID;
8986     switch (BuiltinID) {
8987     default: llvm_unreachable("Unsupported intrinsic!");
8988     case X86::BI__builtin_ia32_rdrand16_step:
8989       ID = Intrinsic::x86_rdrand_16;
8990       break;
8991     case X86::BI__builtin_ia32_rdrand32_step:
8992       ID = Intrinsic::x86_rdrand_32;
8993       break;
8994     case X86::BI__builtin_ia32_rdrand64_step:
8995       ID = Intrinsic::x86_rdrand_64;
8996       break;
8997     case X86::BI__builtin_ia32_rdseed16_step:
8998       ID = Intrinsic::x86_rdseed_16;
8999       break;
9000     case X86::BI__builtin_ia32_rdseed32_step:
9001       ID = Intrinsic::x86_rdseed_32;
9002       break;
9003     case X86::BI__builtin_ia32_rdseed64_step:
9004       ID = Intrinsic::x86_rdseed_64;
9005       break;
9006     }
9007 
9008     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
9009     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
9010                                       Ops[0]);
9011     return Builder.CreateExtractValue(Call, 1);
9012   }
9013 
9014   case X86::BI__builtin_ia32_cmpps128_mask:
9015   case X86::BI__builtin_ia32_cmpps256_mask:
9016   case X86::BI__builtin_ia32_cmpps512_mask:
9017   case X86::BI__builtin_ia32_cmppd128_mask:
9018   case X86::BI__builtin_ia32_cmppd256_mask:
9019   case X86::BI__builtin_ia32_cmppd512_mask: {
9020     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
9021     Value *MaskIn = Ops[3];
9022     Ops.erase(&Ops[3]);
9023 
9024     Intrinsic::ID ID;
9025     switch (BuiltinID) {
9026     default: llvm_unreachable("Unsupported intrinsic!");
9027     case X86::BI__builtin_ia32_cmpps128_mask:
9028       ID = Intrinsic::x86_avx512_mask_cmp_ps_128;
9029       break;
9030     case X86::BI__builtin_ia32_cmpps256_mask:
9031       ID = Intrinsic::x86_avx512_mask_cmp_ps_256;
9032       break;
9033     case X86::BI__builtin_ia32_cmpps512_mask:
9034       ID = Intrinsic::x86_avx512_mask_cmp_ps_512;
9035       break;
9036     case X86::BI__builtin_ia32_cmppd128_mask:
9037       ID = Intrinsic::x86_avx512_mask_cmp_pd_128;
9038       break;
9039     case X86::BI__builtin_ia32_cmppd256_mask:
9040       ID = Intrinsic::x86_avx512_mask_cmp_pd_256;
9041       break;
9042     case X86::BI__builtin_ia32_cmppd512_mask:
9043       ID = Intrinsic::x86_avx512_mask_cmp_pd_512;
9044       break;
9045     }
9046 
9047     Value *Cmp = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
9048     return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn);
9049   }
9050 
9051   // SSE packed comparison intrinsics
9052   case X86::BI__builtin_ia32_cmpeqps:
9053   case X86::BI__builtin_ia32_cmpeqpd:
9054     return getVectorFCmpIR(CmpInst::FCMP_OEQ);
9055   case X86::BI__builtin_ia32_cmpltps:
9056   case X86::BI__builtin_ia32_cmpltpd:
9057     return getVectorFCmpIR(CmpInst::FCMP_OLT);
9058   case X86::BI__builtin_ia32_cmpleps:
9059   case X86::BI__builtin_ia32_cmplepd:
9060     return getVectorFCmpIR(CmpInst::FCMP_OLE);
9061   case X86::BI__builtin_ia32_cmpunordps:
9062   case X86::BI__builtin_ia32_cmpunordpd:
9063     return getVectorFCmpIR(CmpInst::FCMP_UNO);
9064   case X86::BI__builtin_ia32_cmpneqps:
9065   case X86::BI__builtin_ia32_cmpneqpd:
9066     return getVectorFCmpIR(CmpInst::FCMP_UNE);
9067   case X86::BI__builtin_ia32_cmpnltps:
9068   case X86::BI__builtin_ia32_cmpnltpd:
9069     return getVectorFCmpIR(CmpInst::FCMP_UGE);
9070   case X86::BI__builtin_ia32_cmpnleps:
9071   case X86::BI__builtin_ia32_cmpnlepd:
9072     return getVectorFCmpIR(CmpInst::FCMP_UGT);
9073   case X86::BI__builtin_ia32_cmpordps:
9074   case X86::BI__builtin_ia32_cmpordpd:
9075     return getVectorFCmpIR(CmpInst::FCMP_ORD);
9076   case X86::BI__builtin_ia32_cmpps:
9077   case X86::BI__builtin_ia32_cmpps256:
9078   case X86::BI__builtin_ia32_cmppd:
9079   case X86::BI__builtin_ia32_cmppd256: {
9080     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
9081     // If this one of the SSE immediates, we can use native IR.
9082     if (CC < 8) {
9083       FCmpInst::Predicate Pred;
9084       switch (CC) {
9085       case 0: Pred = FCmpInst::FCMP_OEQ; break;
9086       case 1: Pred = FCmpInst::FCMP_OLT; break;
9087       case 2: Pred = FCmpInst::FCMP_OLE; break;
9088       case 3: Pred = FCmpInst::FCMP_UNO; break;
9089       case 4: Pred = FCmpInst::FCMP_UNE; break;
9090       case 5: Pred = FCmpInst::FCMP_UGE; break;
9091       case 6: Pred = FCmpInst::FCMP_UGT; break;
9092       case 7: Pred = FCmpInst::FCMP_ORD; break;
9093       }
9094       return getVectorFCmpIR(Pred);
9095     }
9096 
9097     // We can't handle 8-31 immediates with native IR, use the intrinsic.
9098     // Except for predicates that create constants.
9099     Intrinsic::ID ID;
9100     switch (BuiltinID) {
9101     default: llvm_unreachable("Unsupported intrinsic!");
9102     case X86::BI__builtin_ia32_cmpps:
9103       ID = Intrinsic::x86_sse_cmp_ps;
9104       break;
9105     case X86::BI__builtin_ia32_cmpps256:
9106       // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
9107       // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
9108       if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
9109          Value *Constant = (CC == 0xf || CC == 0x1f) ?
9110                 llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) :
9111                 llvm::Constant::getNullValue(Builder.getInt32Ty());
9112          Value *Vec = Builder.CreateVectorSplat(
9113                         Ops[0]->getType()->getVectorNumElements(), Constant);
9114          return Builder.CreateBitCast(Vec, Ops[0]->getType());
9115       }
9116       ID = Intrinsic::x86_avx_cmp_ps_256;
9117       break;
9118     case X86::BI__builtin_ia32_cmppd:
9119       ID = Intrinsic::x86_sse2_cmp_pd;
9120       break;
9121     case X86::BI__builtin_ia32_cmppd256:
9122       // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
9123       // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
9124       if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
9125          Value *Constant = (CC == 0xf || CC == 0x1f) ?
9126                 llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) :
9127                 llvm::Constant::getNullValue(Builder.getInt64Ty());
9128          Value *Vec = Builder.CreateVectorSplat(
9129                         Ops[0]->getType()->getVectorNumElements(), Constant);
9130          return Builder.CreateBitCast(Vec, Ops[0]->getType());
9131       }
9132       ID = Intrinsic::x86_avx_cmp_pd_256;
9133       break;
9134     }
9135 
9136     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
9137   }
9138 
9139   // SSE scalar comparison intrinsics
9140   case X86::BI__builtin_ia32_cmpeqss:
9141     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
9142   case X86::BI__builtin_ia32_cmpltss:
9143     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
9144   case X86::BI__builtin_ia32_cmpless:
9145     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
9146   case X86::BI__builtin_ia32_cmpunordss:
9147     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
9148   case X86::BI__builtin_ia32_cmpneqss:
9149     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
9150   case X86::BI__builtin_ia32_cmpnltss:
9151     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
9152   case X86::BI__builtin_ia32_cmpnless:
9153     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
9154   case X86::BI__builtin_ia32_cmpordss:
9155     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
9156   case X86::BI__builtin_ia32_cmpeqsd:
9157     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
9158   case X86::BI__builtin_ia32_cmpltsd:
9159     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
9160   case X86::BI__builtin_ia32_cmplesd:
9161     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
9162   case X86::BI__builtin_ia32_cmpunordsd:
9163     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
9164   case X86::BI__builtin_ia32_cmpneqsd:
9165     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
9166   case X86::BI__builtin_ia32_cmpnltsd:
9167     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
9168   case X86::BI__builtin_ia32_cmpnlesd:
9169     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
9170   case X86::BI__builtin_ia32_cmpordsd:
9171     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
9172 
9173   case X86::BI__emul:
9174   case X86::BI__emulu: {
9175     llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
9176     bool isSigned = (BuiltinID == X86::BI__emul);
9177     Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
9178     Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
9179     return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
9180   }
9181   case X86::BI__mulh:
9182   case X86::BI__umulh:
9183   case X86::BI_mul128:
9184   case X86::BI_umul128: {
9185     llvm::Type *ResType = ConvertType(E->getType());
9186     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
9187 
9188     bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
9189     Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
9190     Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
9191 
9192     Value *MulResult, *HigherBits;
9193     if (IsSigned) {
9194       MulResult = Builder.CreateNSWMul(LHS, RHS);
9195       HigherBits = Builder.CreateAShr(MulResult, 64);
9196     } else {
9197       MulResult = Builder.CreateNUWMul(LHS, RHS);
9198       HigherBits = Builder.CreateLShr(MulResult, 64);
9199     }
9200     HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
9201 
9202     if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
9203       return HigherBits;
9204 
9205     Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
9206     Builder.CreateStore(HigherBits, HighBitsAddress);
9207     return Builder.CreateIntCast(MulResult, ResType, IsSigned);
9208   }
9209 
9210   case X86::BI__faststorefence: {
9211     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
9212                                llvm::SyncScope::System);
9213   }
9214   case X86::BI_ReadWriteBarrier:
9215   case X86::BI_ReadBarrier:
9216   case X86::BI_WriteBarrier: {
9217     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
9218                                llvm::SyncScope::SingleThread);
9219   }
9220   case X86::BI_BitScanForward:
9221   case X86::BI_BitScanForward64:
9222     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
9223   case X86::BI_BitScanReverse:
9224   case X86::BI_BitScanReverse64:
9225     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
9226 
9227   case X86::BI_InterlockedAnd64:
9228     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
9229   case X86::BI_InterlockedExchange64:
9230     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
9231   case X86::BI_InterlockedExchangeAdd64:
9232     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
9233   case X86::BI_InterlockedExchangeSub64:
9234     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
9235   case X86::BI_InterlockedOr64:
9236     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
9237   case X86::BI_InterlockedXor64:
9238     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
9239   case X86::BI_InterlockedDecrement64:
9240     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
9241   case X86::BI_InterlockedIncrement64:
9242     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
9243   case X86::BI_InterlockedCompareExchange128: {
9244     // InterlockedCompareExchange128 doesn't directly refer to 128bit ints,
9245     // instead it takes pointers to 64bit ints for Destination and
9246     // ComparandResult, and exchange is taken as two 64bit ints (high & low).
9247     // The previous value is written to ComparandResult, and success is
9248     // returned.
9249 
9250     llvm::Type *Int128Ty = Builder.getInt128Ty();
9251     llvm::Type *Int128PtrTy = Int128Ty->getPointerTo();
9252 
9253     Value *Destination =
9254         Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PtrTy);
9255     Value *ExchangeHigh128 =
9256         Builder.CreateZExt(EmitScalarExpr(E->getArg(1)), Int128Ty);
9257     Value *ExchangeLow128 =
9258         Builder.CreateZExt(EmitScalarExpr(E->getArg(2)), Int128Ty);
9259     Address ComparandResult(
9260         Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int128PtrTy),
9261         getContext().toCharUnitsFromBits(128));
9262 
9263     Value *Exchange = Builder.CreateOr(
9264         Builder.CreateShl(ExchangeHigh128, 64, "", false, false),
9265         ExchangeLow128);
9266 
9267     Value *Comparand = Builder.CreateLoad(ComparandResult);
9268 
9269     AtomicCmpXchgInst *CXI =
9270         Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
9271                                     AtomicOrdering::SequentiallyConsistent,
9272                                     AtomicOrdering::SequentiallyConsistent);
9273     CXI->setVolatile(true);
9274 
9275     // Write the result back to the inout pointer.
9276     Builder.CreateStore(Builder.CreateExtractValue(CXI, 0), ComparandResult);
9277 
9278     // Get the success boolean and zero extend it to i8.
9279     Value *Success = Builder.CreateExtractValue(CXI, 1);
9280     return Builder.CreateZExt(Success, ConvertType(E->getType()));
9281   }
9282 
9283   case X86::BI_AddressOfReturnAddress: {
9284     Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
9285     return Builder.CreateCall(F);
9286   }
9287   case X86::BI__stosb: {
9288     // We treat __stosb as a volatile memset - it may not generate "rep stosb"
9289     // instruction, but it will create a memset that won't be optimized away.
9290     return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
9291   }
9292   case X86::BI__ud2:
9293     // llvm.trap makes a ud2a instruction on x86.
9294     return EmitTrapCall(Intrinsic::trap);
9295   case X86::BI__int2c: {
9296     // This syscall signals a driver assertion failure in x86 NT kernels.
9297     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
9298     llvm::InlineAsm *IA =
9299         llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true);
9300     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
9301         getLLVMContext(), llvm::AttributeList::FunctionIndex,
9302         llvm::Attribute::NoReturn);
9303     CallSite CS = Builder.CreateCall(IA);
9304     CS.setAttributes(NoReturnAttr);
9305     return CS.getInstruction();
9306   }
9307   case X86::BI__readfsbyte:
9308   case X86::BI__readfsword:
9309   case X86::BI__readfsdword:
9310   case X86::BI__readfsqword: {
9311     llvm::Type *IntTy = ConvertType(E->getType());
9312     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
9313                                         llvm::PointerType::get(IntTy, 257));
9314     LoadInst *Load = Builder.CreateAlignedLoad(
9315         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
9316     Load->setVolatile(true);
9317     return Load;
9318   }
9319   case X86::BI__readgsbyte:
9320   case X86::BI__readgsword:
9321   case X86::BI__readgsdword:
9322   case X86::BI__readgsqword: {
9323     llvm::Type *IntTy = ConvertType(E->getType());
9324     Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
9325                                         llvm::PointerType::get(IntTy, 256));
9326     LoadInst *Load = Builder.CreateAlignedLoad(
9327         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
9328     Load->setVolatile(true);
9329     return Load;
9330   }
9331   }
9332 }
9333 
9334 
9335 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
9336                                            const CallExpr *E) {
9337   SmallVector<Value*, 4> Ops;
9338 
9339   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
9340     Ops.push_back(EmitScalarExpr(E->getArg(i)));
9341 
9342   Intrinsic::ID ID = Intrinsic::not_intrinsic;
9343 
9344   switch (BuiltinID) {
9345   default: return nullptr;
9346 
9347   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
9348   // call __builtin_readcyclecounter.
9349   case PPC::BI__builtin_ppc_get_timebase:
9350     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
9351 
9352   // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
9353   case PPC::BI__builtin_altivec_lvx:
9354   case PPC::BI__builtin_altivec_lvxl:
9355   case PPC::BI__builtin_altivec_lvebx:
9356   case PPC::BI__builtin_altivec_lvehx:
9357   case PPC::BI__builtin_altivec_lvewx:
9358   case PPC::BI__builtin_altivec_lvsl:
9359   case PPC::BI__builtin_altivec_lvsr:
9360   case PPC::BI__builtin_vsx_lxvd2x:
9361   case PPC::BI__builtin_vsx_lxvw4x:
9362   case PPC::BI__builtin_vsx_lxvd2x_be:
9363   case PPC::BI__builtin_vsx_lxvw4x_be:
9364   case PPC::BI__builtin_vsx_lxvl:
9365   case PPC::BI__builtin_vsx_lxvll:
9366   {
9367     if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
9368        BuiltinID == PPC::BI__builtin_vsx_lxvll){
9369       Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
9370     }else {
9371       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
9372       Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
9373       Ops.pop_back();
9374     }
9375 
9376     switch (BuiltinID) {
9377     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
9378     case PPC::BI__builtin_altivec_lvx:
9379       ID = Intrinsic::ppc_altivec_lvx;
9380       break;
9381     case PPC::BI__builtin_altivec_lvxl:
9382       ID = Intrinsic::ppc_altivec_lvxl;
9383       break;
9384     case PPC::BI__builtin_altivec_lvebx:
9385       ID = Intrinsic::ppc_altivec_lvebx;
9386       break;
9387     case PPC::BI__builtin_altivec_lvehx:
9388       ID = Intrinsic::ppc_altivec_lvehx;
9389       break;
9390     case PPC::BI__builtin_altivec_lvewx:
9391       ID = Intrinsic::ppc_altivec_lvewx;
9392       break;
9393     case PPC::BI__builtin_altivec_lvsl:
9394       ID = Intrinsic::ppc_altivec_lvsl;
9395       break;
9396     case PPC::BI__builtin_altivec_lvsr:
9397       ID = Intrinsic::ppc_altivec_lvsr;
9398       break;
9399     case PPC::BI__builtin_vsx_lxvd2x:
9400       ID = Intrinsic::ppc_vsx_lxvd2x;
9401       break;
9402     case PPC::BI__builtin_vsx_lxvw4x:
9403       ID = Intrinsic::ppc_vsx_lxvw4x;
9404       break;
9405     case PPC::BI__builtin_vsx_lxvd2x_be:
9406       ID = Intrinsic::ppc_vsx_lxvd2x_be;
9407       break;
9408     case PPC::BI__builtin_vsx_lxvw4x_be:
9409       ID = Intrinsic::ppc_vsx_lxvw4x_be;
9410       break;
9411     case PPC::BI__builtin_vsx_lxvl:
9412       ID = Intrinsic::ppc_vsx_lxvl;
9413       break;
9414     case PPC::BI__builtin_vsx_lxvll:
9415       ID = Intrinsic::ppc_vsx_lxvll;
9416       break;
9417     }
9418     llvm::Function *F = CGM.getIntrinsic(ID);
9419     return Builder.CreateCall(F, Ops, "");
9420   }
9421 
9422   // vec_st, vec_xst_be
9423   case PPC::BI__builtin_altivec_stvx:
9424   case PPC::BI__builtin_altivec_stvxl:
9425   case PPC::BI__builtin_altivec_stvebx:
9426   case PPC::BI__builtin_altivec_stvehx:
9427   case PPC::BI__builtin_altivec_stvewx:
9428   case PPC::BI__builtin_vsx_stxvd2x:
9429   case PPC::BI__builtin_vsx_stxvw4x:
9430   case PPC::BI__builtin_vsx_stxvd2x_be:
9431   case PPC::BI__builtin_vsx_stxvw4x_be:
9432   case PPC::BI__builtin_vsx_stxvl:
9433   case PPC::BI__builtin_vsx_stxvll:
9434   {
9435     if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
9436       BuiltinID == PPC::BI__builtin_vsx_stxvll ){
9437       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
9438     }else {
9439       Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
9440       Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
9441       Ops.pop_back();
9442     }
9443 
9444     switch (BuiltinID) {
9445     default: llvm_unreachable("Unsupported st intrinsic!");
9446     case PPC::BI__builtin_altivec_stvx:
9447       ID = Intrinsic::ppc_altivec_stvx;
9448       break;
9449     case PPC::BI__builtin_altivec_stvxl:
9450       ID = Intrinsic::ppc_altivec_stvxl;
9451       break;
9452     case PPC::BI__builtin_altivec_stvebx:
9453       ID = Intrinsic::ppc_altivec_stvebx;
9454       break;
9455     case PPC::BI__builtin_altivec_stvehx:
9456       ID = Intrinsic::ppc_altivec_stvehx;
9457       break;
9458     case PPC::BI__builtin_altivec_stvewx:
9459       ID = Intrinsic::ppc_altivec_stvewx;
9460       break;
9461     case PPC::BI__builtin_vsx_stxvd2x:
9462       ID = Intrinsic::ppc_vsx_stxvd2x;
9463       break;
9464     case PPC::BI__builtin_vsx_stxvw4x:
9465       ID = Intrinsic::ppc_vsx_stxvw4x;
9466       break;
9467     case PPC::BI__builtin_vsx_stxvd2x_be:
9468       ID = Intrinsic::ppc_vsx_stxvd2x_be;
9469       break;
9470     case PPC::BI__builtin_vsx_stxvw4x_be:
9471       ID = Intrinsic::ppc_vsx_stxvw4x_be;
9472       break;
9473     case PPC::BI__builtin_vsx_stxvl:
9474       ID = Intrinsic::ppc_vsx_stxvl;
9475       break;
9476     case PPC::BI__builtin_vsx_stxvll:
9477       ID = Intrinsic::ppc_vsx_stxvll;
9478       break;
9479     }
9480     llvm::Function *F = CGM.getIntrinsic(ID);
9481     return Builder.CreateCall(F, Ops, "");
9482   }
9483   // Square root
9484   case PPC::BI__builtin_vsx_xvsqrtsp:
9485   case PPC::BI__builtin_vsx_xvsqrtdp: {
9486     llvm::Type *ResultType = ConvertType(E->getType());
9487     Value *X = EmitScalarExpr(E->getArg(0));
9488     ID = Intrinsic::sqrt;
9489     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
9490     return Builder.CreateCall(F, X);
9491   }
9492   // Count leading zeros
9493   case PPC::BI__builtin_altivec_vclzb:
9494   case PPC::BI__builtin_altivec_vclzh:
9495   case PPC::BI__builtin_altivec_vclzw:
9496   case PPC::BI__builtin_altivec_vclzd: {
9497     llvm::Type *ResultType = ConvertType(E->getType());
9498     Value *X = EmitScalarExpr(E->getArg(0));
9499     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
9500     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
9501     return Builder.CreateCall(F, {X, Undef});
9502   }
9503   case PPC::BI__builtin_altivec_vctzb:
9504   case PPC::BI__builtin_altivec_vctzh:
9505   case PPC::BI__builtin_altivec_vctzw:
9506   case PPC::BI__builtin_altivec_vctzd: {
9507     llvm::Type *ResultType = ConvertType(E->getType());
9508     Value *X = EmitScalarExpr(E->getArg(0));
9509     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
9510     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
9511     return Builder.CreateCall(F, {X, Undef});
9512   }
9513   case PPC::BI__builtin_altivec_vpopcntb:
9514   case PPC::BI__builtin_altivec_vpopcnth:
9515   case PPC::BI__builtin_altivec_vpopcntw:
9516   case PPC::BI__builtin_altivec_vpopcntd: {
9517     llvm::Type *ResultType = ConvertType(E->getType());
9518     Value *X = EmitScalarExpr(E->getArg(0));
9519     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
9520     return Builder.CreateCall(F, X);
9521   }
9522   // Copy sign
9523   case PPC::BI__builtin_vsx_xvcpsgnsp:
9524   case PPC::BI__builtin_vsx_xvcpsgndp: {
9525     llvm::Type *ResultType = ConvertType(E->getType());
9526     Value *X = EmitScalarExpr(E->getArg(0));
9527     Value *Y = EmitScalarExpr(E->getArg(1));
9528     ID = Intrinsic::copysign;
9529     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
9530     return Builder.CreateCall(F, {X, Y});
9531   }
9532   // Rounding/truncation
9533   case PPC::BI__builtin_vsx_xvrspip:
9534   case PPC::BI__builtin_vsx_xvrdpip:
9535   case PPC::BI__builtin_vsx_xvrdpim:
9536   case PPC::BI__builtin_vsx_xvrspim:
9537   case PPC::BI__builtin_vsx_xvrdpi:
9538   case PPC::BI__builtin_vsx_xvrspi:
9539   case PPC::BI__builtin_vsx_xvrdpic:
9540   case PPC::BI__builtin_vsx_xvrspic:
9541   case PPC::BI__builtin_vsx_xvrdpiz:
9542   case PPC::BI__builtin_vsx_xvrspiz: {
9543     llvm::Type *ResultType = ConvertType(E->getType());
9544     Value *X = EmitScalarExpr(E->getArg(0));
9545     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
9546         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
9547       ID = Intrinsic::floor;
9548     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
9549              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
9550       ID = Intrinsic::round;
9551     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
9552              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
9553       ID = Intrinsic::nearbyint;
9554     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
9555              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
9556       ID = Intrinsic::ceil;
9557     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
9558              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
9559       ID = Intrinsic::trunc;
9560     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
9561     return Builder.CreateCall(F, X);
9562   }
9563 
9564   // Absolute value
9565   case PPC::BI__builtin_vsx_xvabsdp:
9566   case PPC::BI__builtin_vsx_xvabssp: {
9567     llvm::Type *ResultType = ConvertType(E->getType());
9568     Value *X = EmitScalarExpr(E->getArg(0));
9569     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
9570     return Builder.CreateCall(F, X);
9571   }
9572 
9573   // FMA variations
9574   case PPC::BI__builtin_vsx_xvmaddadp:
9575   case PPC::BI__builtin_vsx_xvmaddasp:
9576   case PPC::BI__builtin_vsx_xvnmaddadp:
9577   case PPC::BI__builtin_vsx_xvnmaddasp:
9578   case PPC::BI__builtin_vsx_xvmsubadp:
9579   case PPC::BI__builtin_vsx_xvmsubasp:
9580   case PPC::BI__builtin_vsx_xvnmsubadp:
9581   case PPC::BI__builtin_vsx_xvnmsubasp: {
9582     llvm::Type *ResultType = ConvertType(E->getType());
9583     Value *X = EmitScalarExpr(E->getArg(0));
9584     Value *Y = EmitScalarExpr(E->getArg(1));
9585     Value *Z = EmitScalarExpr(E->getArg(2));
9586     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
9587     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
9588     switch (BuiltinID) {
9589       case PPC::BI__builtin_vsx_xvmaddadp:
9590       case PPC::BI__builtin_vsx_xvmaddasp:
9591         return Builder.CreateCall(F, {X, Y, Z});
9592       case PPC::BI__builtin_vsx_xvnmaddadp:
9593       case PPC::BI__builtin_vsx_xvnmaddasp:
9594         return Builder.CreateFSub(Zero,
9595                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
9596       case PPC::BI__builtin_vsx_xvmsubadp:
9597       case PPC::BI__builtin_vsx_xvmsubasp:
9598         return Builder.CreateCall(F,
9599                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
9600       case PPC::BI__builtin_vsx_xvnmsubadp:
9601       case PPC::BI__builtin_vsx_xvnmsubasp:
9602         Value *FsubRes =
9603           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
9604         return Builder.CreateFSub(Zero, FsubRes, "sub");
9605     }
9606     llvm_unreachable("Unknown FMA operation");
9607     return nullptr; // Suppress no-return warning
9608   }
9609 
9610   case PPC::BI__builtin_vsx_insertword: {
9611     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
9612 
9613     // Third argument is a compile time constant int. It must be clamped to
9614     // to the range [0, 12].
9615     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
9616     assert(ArgCI &&
9617            "Third arg to xxinsertw intrinsic must be constant integer");
9618     const int64_t MaxIndex = 12;
9619     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
9620 
9621     // The builtin semantics don't exactly match the xxinsertw instructions
9622     // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
9623     // word from the first argument, and inserts it in the second argument. The
9624     // instruction extracts the word from its second input register and inserts
9625     // it into its first input register, so swap the first and second arguments.
9626     std::swap(Ops[0], Ops[1]);
9627 
9628     // Need to cast the second argument from a vector of unsigned int to a
9629     // vector of long long.
9630     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
9631 
9632     if (getTarget().isLittleEndian()) {
9633       // Create a shuffle mask of (1, 0)
9634       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
9635                                    ConstantInt::get(Int32Ty, 0)
9636                                  };
9637       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
9638 
9639       // Reverse the double words in the vector we will extract from.
9640       Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
9641       Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
9642 
9643       // Reverse the index.
9644       Index = MaxIndex - Index;
9645     }
9646 
9647     // Intrinsic expects the first arg to be a vector of int.
9648     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
9649     Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
9650     return Builder.CreateCall(F, Ops);
9651   }
9652 
9653   case PPC::BI__builtin_vsx_extractuword: {
9654     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
9655 
9656     // Intrinsic expects the first argument to be a vector of doublewords.
9657     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
9658 
9659     // The second argument is a compile time constant int that needs to
9660     // be clamped to the range [0, 12].
9661     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
9662     assert(ArgCI &&
9663            "Second Arg to xxextractuw intrinsic must be a constant integer!");
9664     const int64_t MaxIndex = 12;
9665     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
9666 
9667     if (getTarget().isLittleEndian()) {
9668       // Reverse the index.
9669       Index = MaxIndex - Index;
9670       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
9671 
9672       // Emit the call, then reverse the double words of the results vector.
9673       Value *Call = Builder.CreateCall(F, Ops);
9674 
9675       // Create a shuffle mask of (1, 0)
9676       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
9677                                    ConstantInt::get(Int32Ty, 0)
9678                                  };
9679       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
9680 
9681       Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
9682       return ShuffleCall;
9683     } else {
9684       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
9685       return Builder.CreateCall(F, Ops);
9686     }
9687   }
9688 
9689   case PPC::BI__builtin_vsx_xxpermdi: {
9690     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
9691     assert(ArgCI && "Third arg must be constant integer!");
9692 
9693     unsigned Index = ArgCI->getZExtValue();
9694     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
9695     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
9696 
9697     // Element zero comes from the first input vector and element one comes from
9698     // the second. The element indices within each vector are numbered in big
9699     // endian order so the shuffle mask must be adjusted for this on little
9700     // endian platforms (i.e. index is complemented and source vector reversed).
9701     unsigned ElemIdx0;
9702     unsigned ElemIdx1;
9703     if (getTarget().isLittleEndian()) {
9704       ElemIdx0 = (~Index & 1) + 2;
9705       ElemIdx1 = (~Index & 2) >> 1;
9706     } else { // BigEndian
9707       ElemIdx0 = (Index & 2) >> 1;
9708       ElemIdx1 = 2 + (Index & 1);
9709     }
9710 
9711     Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
9712                                 ConstantInt::get(Int32Ty, ElemIdx1)};
9713     Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
9714 
9715     Value *ShuffleCall =
9716         Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
9717     QualType BIRetType = E->getType();
9718     auto RetTy = ConvertType(BIRetType);
9719     return Builder.CreateBitCast(ShuffleCall, RetTy);
9720   }
9721 
9722   case PPC::BI__builtin_vsx_xxsldwi: {
9723     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
9724     assert(ArgCI && "Third argument must be a compile time constant");
9725     unsigned Index = ArgCI->getZExtValue() & 0x3;
9726     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
9727     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
9728 
9729     // Create a shuffle mask
9730     unsigned ElemIdx0;
9731     unsigned ElemIdx1;
9732     unsigned ElemIdx2;
9733     unsigned ElemIdx3;
9734     if (getTarget().isLittleEndian()) {
9735       // Little endian element N comes from element 8+N-Index of the
9736       // concatenated wide vector (of course, using modulo arithmetic on
9737       // the total number of elements).
9738       ElemIdx0 = (8 - Index) % 8;
9739       ElemIdx1 = (9 - Index) % 8;
9740       ElemIdx2 = (10 - Index) % 8;
9741       ElemIdx3 = (11 - Index) % 8;
9742     } else {
9743       // Big endian ElemIdx<N> = Index + N
9744       ElemIdx0 = Index;
9745       ElemIdx1 = Index + 1;
9746       ElemIdx2 = Index + 2;
9747       ElemIdx3 = Index + 3;
9748     }
9749 
9750     Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
9751                                 ConstantInt::get(Int32Ty, ElemIdx1),
9752                                 ConstantInt::get(Int32Ty, ElemIdx2),
9753                                 ConstantInt::get(Int32Ty, ElemIdx3)};
9754 
9755     Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
9756     Value *ShuffleCall =
9757         Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
9758     QualType BIRetType = E->getType();
9759     auto RetTy = ConvertType(BIRetType);
9760     return Builder.CreateBitCast(ShuffleCall, RetTy);
9761   }
9762   }
9763 }
9764 
9765 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
9766                                               const CallExpr *E) {
9767   switch (BuiltinID) {
9768   case AMDGPU::BI__builtin_amdgcn_div_scale:
9769   case AMDGPU::BI__builtin_amdgcn_div_scalef: {
9770     // Translate from the intrinsics's struct return to the builtin's out
9771     // argument.
9772 
9773     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
9774 
9775     llvm::Value *X = EmitScalarExpr(E->getArg(0));
9776     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
9777     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
9778 
9779     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
9780                                            X->getType());
9781 
9782     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
9783 
9784     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
9785     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
9786 
9787     llvm::Type *RealFlagType
9788       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
9789 
9790     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
9791     Builder.CreateStore(FlagExt, FlagOutPtr);
9792     return Result;
9793   }
9794   case AMDGPU::BI__builtin_amdgcn_div_fmas:
9795   case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
9796     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
9797     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
9798     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
9799     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
9800 
9801     llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
9802                                       Src0->getType());
9803     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
9804     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
9805   }
9806 
9807   case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
9808     return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
9809   case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
9810     llvm::SmallVector<llvm::Value *, 5> Args;
9811     for (unsigned I = 0; I != 5; ++I)
9812       Args.push_back(EmitScalarExpr(E->getArg(I)));
9813     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,
9814                                     Args[0]->getType());
9815     return Builder.CreateCall(F, Args);
9816   }
9817   case AMDGPU::BI__builtin_amdgcn_div_fixup:
9818   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
9819   case AMDGPU::BI__builtin_amdgcn_div_fixuph:
9820     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
9821   case AMDGPU::BI__builtin_amdgcn_trig_preop:
9822   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
9823     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
9824   case AMDGPU::BI__builtin_amdgcn_rcp:
9825   case AMDGPU::BI__builtin_amdgcn_rcpf:
9826   case AMDGPU::BI__builtin_amdgcn_rcph:
9827     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
9828   case AMDGPU::BI__builtin_amdgcn_rsq:
9829   case AMDGPU::BI__builtin_amdgcn_rsqf:
9830   case AMDGPU::BI__builtin_amdgcn_rsqh:
9831     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
9832   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
9833   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
9834     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
9835   case AMDGPU::BI__builtin_amdgcn_sinf:
9836   case AMDGPU::BI__builtin_amdgcn_sinh:
9837     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
9838   case AMDGPU::BI__builtin_amdgcn_cosf:
9839   case AMDGPU::BI__builtin_amdgcn_cosh:
9840     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
9841   case AMDGPU::BI__builtin_amdgcn_log_clampf:
9842     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
9843   case AMDGPU::BI__builtin_amdgcn_ldexp:
9844   case AMDGPU::BI__builtin_amdgcn_ldexpf:
9845   case AMDGPU::BI__builtin_amdgcn_ldexph:
9846     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
9847   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
9848   case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
9849   case AMDGPU::BI__builtin_amdgcn_frexp_manth:
9850     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
9851   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
9852   case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
9853     Value *Src0 = EmitScalarExpr(E->getArg(0));
9854     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
9855                                 { Builder.getInt32Ty(), Src0->getType() });
9856     return Builder.CreateCall(F, Src0);
9857   }
9858   case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
9859     Value *Src0 = EmitScalarExpr(E->getArg(0));
9860     Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
9861                                 { Builder.getInt16Ty(), Src0->getType() });
9862     return Builder.CreateCall(F, Src0);
9863   }
9864   case AMDGPU::BI__builtin_amdgcn_fract:
9865   case AMDGPU::BI__builtin_amdgcn_fractf:
9866   case AMDGPU::BI__builtin_amdgcn_fracth:
9867     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
9868   case AMDGPU::BI__builtin_amdgcn_lerp:
9869     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
9870   case AMDGPU::BI__builtin_amdgcn_uicmp:
9871   case AMDGPU::BI__builtin_amdgcn_uicmpl:
9872   case AMDGPU::BI__builtin_amdgcn_sicmp:
9873   case AMDGPU::BI__builtin_amdgcn_sicmpl:
9874     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
9875   case AMDGPU::BI__builtin_amdgcn_fcmp:
9876   case AMDGPU::BI__builtin_amdgcn_fcmpf:
9877     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
9878   case AMDGPU::BI__builtin_amdgcn_class:
9879   case AMDGPU::BI__builtin_amdgcn_classf:
9880   case AMDGPU::BI__builtin_amdgcn_classh:
9881     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
9882   case AMDGPU::BI__builtin_amdgcn_fmed3f:
9883   case AMDGPU::BI__builtin_amdgcn_fmed3h:
9884     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
9885   case AMDGPU::BI__builtin_amdgcn_read_exec: {
9886     CallInst *CI = cast<CallInst>(
9887       EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
9888     CI->setConvergent();
9889     return CI;
9890   }
9891   case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
9892   case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
9893     StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ?
9894       "exec_lo" : "exec_hi";
9895     CallInst *CI = cast<CallInst>(
9896       EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, true, RegName));
9897     CI->setConvergent();
9898     return CI;
9899   }
9900 
9901   // amdgcn workitem
9902   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
9903     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
9904   case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
9905     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
9906   case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
9907     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
9908 
9909   // r600 intrinsics
9910   case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
9911   case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
9912     return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
9913   case AMDGPU::BI__builtin_r600_read_tidig_x:
9914     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
9915   case AMDGPU::BI__builtin_r600_read_tidig_y:
9916     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
9917   case AMDGPU::BI__builtin_r600_read_tidig_z:
9918     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
9919   default:
9920     return nullptr;
9921   }
9922 }
9923 
9924 /// Handle a SystemZ function in which the final argument is a pointer
9925 /// to an int that receives the post-instruction CC value.  At the LLVM level
9926 /// this is represented as a function that returns a {result, cc} pair.
9927 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
9928                                          unsigned IntrinsicID,
9929                                          const CallExpr *E) {
9930   unsigned NumArgs = E->getNumArgs() - 1;
9931   SmallVector<Value *, 8> Args(NumArgs);
9932   for (unsigned I = 0; I < NumArgs; ++I)
9933     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
9934   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
9935   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
9936   Value *Call = CGF.Builder.CreateCall(F, Args);
9937   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
9938   CGF.Builder.CreateStore(CC, CCPtr);
9939   return CGF.Builder.CreateExtractValue(Call, 0);
9940 }
9941 
9942 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
9943                                                const CallExpr *E) {
9944   switch (BuiltinID) {
9945   case SystemZ::BI__builtin_tbegin: {
9946     Value *TDB = EmitScalarExpr(E->getArg(0));
9947     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
9948     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
9949     return Builder.CreateCall(F, {TDB, Control});
9950   }
9951   case SystemZ::BI__builtin_tbegin_nofloat: {
9952     Value *TDB = EmitScalarExpr(E->getArg(0));
9953     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
9954     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
9955     return Builder.CreateCall(F, {TDB, Control});
9956   }
9957   case SystemZ::BI__builtin_tbeginc: {
9958     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
9959     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
9960     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
9961     return Builder.CreateCall(F, {TDB, Control});
9962   }
9963   case SystemZ::BI__builtin_tabort: {
9964     Value *Data = EmitScalarExpr(E->getArg(0));
9965     Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
9966     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
9967   }
9968   case SystemZ::BI__builtin_non_tx_store: {
9969     Value *Address = EmitScalarExpr(E->getArg(0));
9970     Value *Data = EmitScalarExpr(E->getArg(1));
9971     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
9972     return Builder.CreateCall(F, {Data, Address});
9973   }
9974 
9975   // Vector builtins.  Note that most vector builtins are mapped automatically
9976   // to target-specific LLVM intrinsics.  The ones handled specially here can
9977   // be represented via standard LLVM IR, which is preferable to enable common
9978   // LLVM optimizations.
9979 
9980   case SystemZ::BI__builtin_s390_vpopctb:
9981   case SystemZ::BI__builtin_s390_vpopcth:
9982   case SystemZ::BI__builtin_s390_vpopctf:
9983   case SystemZ::BI__builtin_s390_vpopctg: {
9984     llvm::Type *ResultType = ConvertType(E->getType());
9985     Value *X = EmitScalarExpr(E->getArg(0));
9986     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
9987     return Builder.CreateCall(F, X);
9988   }
9989 
9990   case SystemZ::BI__builtin_s390_vclzb:
9991   case SystemZ::BI__builtin_s390_vclzh:
9992   case SystemZ::BI__builtin_s390_vclzf:
9993   case SystemZ::BI__builtin_s390_vclzg: {
9994     llvm::Type *ResultType = ConvertType(E->getType());
9995     Value *X = EmitScalarExpr(E->getArg(0));
9996     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
9997     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
9998     return Builder.CreateCall(F, {X, Undef});
9999   }
10000 
10001   case SystemZ::BI__builtin_s390_vctzb:
10002   case SystemZ::BI__builtin_s390_vctzh:
10003   case SystemZ::BI__builtin_s390_vctzf:
10004   case SystemZ::BI__builtin_s390_vctzg: {
10005     llvm::Type *ResultType = ConvertType(E->getType());
10006     Value *X = EmitScalarExpr(E->getArg(0));
10007     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
10008     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
10009     return Builder.CreateCall(F, {X, Undef});
10010   }
10011 
10012   case SystemZ::BI__builtin_s390_vfsqsb:
10013   case SystemZ::BI__builtin_s390_vfsqdb: {
10014     llvm::Type *ResultType = ConvertType(E->getType());
10015     Value *X = EmitScalarExpr(E->getArg(0));
10016     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
10017     return Builder.CreateCall(F, X);
10018   }
10019   case SystemZ::BI__builtin_s390_vfmasb:
10020   case SystemZ::BI__builtin_s390_vfmadb: {
10021     llvm::Type *ResultType = ConvertType(E->getType());
10022     Value *X = EmitScalarExpr(E->getArg(0));
10023     Value *Y = EmitScalarExpr(E->getArg(1));
10024     Value *Z = EmitScalarExpr(E->getArg(2));
10025     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
10026     return Builder.CreateCall(F, {X, Y, Z});
10027   }
10028   case SystemZ::BI__builtin_s390_vfmssb:
10029   case SystemZ::BI__builtin_s390_vfmsdb: {
10030     llvm::Type *ResultType = ConvertType(E->getType());
10031     Value *X = EmitScalarExpr(E->getArg(0));
10032     Value *Y = EmitScalarExpr(E->getArg(1));
10033     Value *Z = EmitScalarExpr(E->getArg(2));
10034     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
10035     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
10036     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
10037   }
10038   case SystemZ::BI__builtin_s390_vfnmasb:
10039   case SystemZ::BI__builtin_s390_vfnmadb: {
10040     llvm::Type *ResultType = ConvertType(E->getType());
10041     Value *X = EmitScalarExpr(E->getArg(0));
10042     Value *Y = EmitScalarExpr(E->getArg(1));
10043     Value *Z = EmitScalarExpr(E->getArg(2));
10044     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
10045     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
10046     return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub");
10047   }
10048   case SystemZ::BI__builtin_s390_vfnmssb:
10049   case SystemZ::BI__builtin_s390_vfnmsdb: {
10050     llvm::Type *ResultType = ConvertType(E->getType());
10051     Value *X = EmitScalarExpr(E->getArg(0));
10052     Value *Y = EmitScalarExpr(E->getArg(1));
10053     Value *Z = EmitScalarExpr(E->getArg(2));
10054     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
10055     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
10056     Value *NegZ = Builder.CreateFSub(Zero, Z, "sub");
10057     return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ}));
10058   }
10059   case SystemZ::BI__builtin_s390_vflpsb:
10060   case SystemZ::BI__builtin_s390_vflpdb: {
10061     llvm::Type *ResultType = ConvertType(E->getType());
10062     Value *X = EmitScalarExpr(E->getArg(0));
10063     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
10064     return Builder.CreateCall(F, X);
10065   }
10066   case SystemZ::BI__builtin_s390_vflnsb:
10067   case SystemZ::BI__builtin_s390_vflndb: {
10068     llvm::Type *ResultType = ConvertType(E->getType());
10069     Value *X = EmitScalarExpr(E->getArg(0));
10070     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
10071     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
10072     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
10073   }
10074   case SystemZ::BI__builtin_s390_vfisb:
10075   case SystemZ::BI__builtin_s390_vfidb: {
10076     llvm::Type *ResultType = ConvertType(E->getType());
10077     Value *X = EmitScalarExpr(E->getArg(0));
10078     // Constant-fold the M4 and M5 mask arguments.
10079     llvm::APSInt M4, M5;
10080     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
10081     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
10082     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
10083     (void)IsConstM4; (void)IsConstM5;
10084     // Check whether this instance can be represented via a LLVM standard
10085     // intrinsic.  We only support some combinations of M4 and M5.
10086     Intrinsic::ID ID = Intrinsic::not_intrinsic;
10087     switch (M4.getZExtValue()) {
10088     default: break;
10089     case 0:  // IEEE-inexact exception allowed
10090       switch (M5.getZExtValue()) {
10091       default: break;
10092       case 0: ID = Intrinsic::rint; break;
10093       }
10094       break;
10095     case 4:  // IEEE-inexact exception suppressed
10096       switch (M5.getZExtValue()) {
10097       default: break;
10098       case 0: ID = Intrinsic::nearbyint; break;
10099       case 1: ID = Intrinsic::round; break;
10100       case 5: ID = Intrinsic::trunc; break;
10101       case 6: ID = Intrinsic::ceil; break;
10102       case 7: ID = Intrinsic::floor; break;
10103       }
10104       break;
10105     }
10106     if (ID != Intrinsic::not_intrinsic) {
10107       Function *F = CGM.getIntrinsic(ID, ResultType);
10108       return Builder.CreateCall(F, X);
10109     }
10110     switch (BuiltinID) {
10111       case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
10112       case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
10113       default: llvm_unreachable("Unknown BuiltinID");
10114     }
10115     Function *F = CGM.getIntrinsic(ID);
10116     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
10117     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
10118     return Builder.CreateCall(F, {X, M4Value, M5Value});
10119   }
10120   case SystemZ::BI__builtin_s390_vfmaxsb:
10121   case SystemZ::BI__builtin_s390_vfmaxdb: {
10122     llvm::Type *ResultType = ConvertType(E->getType());
10123     Value *X = EmitScalarExpr(E->getArg(0));
10124     Value *Y = EmitScalarExpr(E->getArg(1));
10125     // Constant-fold the M4 mask argument.
10126     llvm::APSInt M4;
10127     bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
10128     assert(IsConstM4 && "Constant arg isn't actually constant?");
10129     (void)IsConstM4;
10130     // Check whether this instance can be represented via a LLVM standard
10131     // intrinsic.  We only support some values of M4.
10132     Intrinsic::ID ID = Intrinsic::not_intrinsic;
10133     switch (M4.getZExtValue()) {
10134     default: break;
10135     case 4: ID = Intrinsic::maxnum; break;
10136     }
10137     if (ID != Intrinsic::not_intrinsic) {
10138       Function *F = CGM.getIntrinsic(ID, ResultType);
10139       return Builder.CreateCall(F, {X, Y});
10140     }
10141     switch (BuiltinID) {
10142       case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
10143       case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
10144       default: llvm_unreachable("Unknown BuiltinID");
10145     }
10146     Function *F = CGM.getIntrinsic(ID);
10147     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
10148     return Builder.CreateCall(F, {X, Y, M4Value});
10149   }
10150   case SystemZ::BI__builtin_s390_vfminsb:
10151   case SystemZ::BI__builtin_s390_vfmindb: {
10152     llvm::Type *ResultType = ConvertType(E->getType());
10153     Value *X = EmitScalarExpr(E->getArg(0));
10154     Value *Y = EmitScalarExpr(E->getArg(1));
10155     // Constant-fold the M4 mask argument.
10156     llvm::APSInt M4;
10157     bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
10158     assert(IsConstM4 && "Constant arg isn't actually constant?");
10159     (void)IsConstM4;
10160     // Check whether this instance can be represented via a LLVM standard
10161     // intrinsic.  We only support some values of M4.
10162     Intrinsic::ID ID = Intrinsic::not_intrinsic;
10163     switch (M4.getZExtValue()) {
10164     default: break;
10165     case 4: ID = Intrinsic::minnum; break;
10166     }
10167     if (ID != Intrinsic::not_intrinsic) {
10168       Function *F = CGM.getIntrinsic(ID, ResultType);
10169       return Builder.CreateCall(F, {X, Y});
10170     }
10171     switch (BuiltinID) {
10172       case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
10173       case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
10174       default: llvm_unreachable("Unknown BuiltinID");
10175     }
10176     Function *F = CGM.getIntrinsic(ID);
10177     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
10178     return Builder.CreateCall(F, {X, Y, M4Value});
10179   }
10180 
10181   // Vector intrisincs that output the post-instruction CC value.
10182 
10183 #define INTRINSIC_WITH_CC(NAME) \
10184     case SystemZ::BI__builtin_##NAME: \
10185       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
10186 
10187   INTRINSIC_WITH_CC(s390_vpkshs);
10188   INTRINSIC_WITH_CC(s390_vpksfs);
10189   INTRINSIC_WITH_CC(s390_vpksgs);
10190 
10191   INTRINSIC_WITH_CC(s390_vpklshs);
10192   INTRINSIC_WITH_CC(s390_vpklsfs);
10193   INTRINSIC_WITH_CC(s390_vpklsgs);
10194 
10195   INTRINSIC_WITH_CC(s390_vceqbs);
10196   INTRINSIC_WITH_CC(s390_vceqhs);
10197   INTRINSIC_WITH_CC(s390_vceqfs);
10198   INTRINSIC_WITH_CC(s390_vceqgs);
10199 
10200   INTRINSIC_WITH_CC(s390_vchbs);
10201   INTRINSIC_WITH_CC(s390_vchhs);
10202   INTRINSIC_WITH_CC(s390_vchfs);
10203   INTRINSIC_WITH_CC(s390_vchgs);
10204 
10205   INTRINSIC_WITH_CC(s390_vchlbs);
10206   INTRINSIC_WITH_CC(s390_vchlhs);
10207   INTRINSIC_WITH_CC(s390_vchlfs);
10208   INTRINSIC_WITH_CC(s390_vchlgs);
10209 
10210   INTRINSIC_WITH_CC(s390_vfaebs);
10211   INTRINSIC_WITH_CC(s390_vfaehs);
10212   INTRINSIC_WITH_CC(s390_vfaefs);
10213 
10214   INTRINSIC_WITH_CC(s390_vfaezbs);
10215   INTRINSIC_WITH_CC(s390_vfaezhs);
10216   INTRINSIC_WITH_CC(s390_vfaezfs);
10217 
10218   INTRINSIC_WITH_CC(s390_vfeebs);
10219   INTRINSIC_WITH_CC(s390_vfeehs);
10220   INTRINSIC_WITH_CC(s390_vfeefs);
10221 
10222   INTRINSIC_WITH_CC(s390_vfeezbs);
10223   INTRINSIC_WITH_CC(s390_vfeezhs);
10224   INTRINSIC_WITH_CC(s390_vfeezfs);
10225 
10226   INTRINSIC_WITH_CC(s390_vfenebs);
10227   INTRINSIC_WITH_CC(s390_vfenehs);
10228   INTRINSIC_WITH_CC(s390_vfenefs);
10229 
10230   INTRINSIC_WITH_CC(s390_vfenezbs);
10231   INTRINSIC_WITH_CC(s390_vfenezhs);
10232   INTRINSIC_WITH_CC(s390_vfenezfs);
10233 
10234   INTRINSIC_WITH_CC(s390_vistrbs);
10235   INTRINSIC_WITH_CC(s390_vistrhs);
10236   INTRINSIC_WITH_CC(s390_vistrfs);
10237 
10238   INTRINSIC_WITH_CC(s390_vstrcbs);
10239   INTRINSIC_WITH_CC(s390_vstrchs);
10240   INTRINSIC_WITH_CC(s390_vstrcfs);
10241 
10242   INTRINSIC_WITH_CC(s390_vstrczbs);
10243   INTRINSIC_WITH_CC(s390_vstrczhs);
10244   INTRINSIC_WITH_CC(s390_vstrczfs);
10245 
10246   INTRINSIC_WITH_CC(s390_vfcesbs);
10247   INTRINSIC_WITH_CC(s390_vfcedbs);
10248   INTRINSIC_WITH_CC(s390_vfchsbs);
10249   INTRINSIC_WITH_CC(s390_vfchdbs);
10250   INTRINSIC_WITH_CC(s390_vfchesbs);
10251   INTRINSIC_WITH_CC(s390_vfchedbs);
10252 
10253   INTRINSIC_WITH_CC(s390_vftcisb);
10254   INTRINSIC_WITH_CC(s390_vftcidb);
10255 
10256 #undef INTRINSIC_WITH_CC
10257 
10258   default:
10259     return nullptr;
10260   }
10261 }
10262 
10263 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
10264                                              const CallExpr *E) {
10265   auto MakeLdg = [&](unsigned IntrinsicID) {
10266     Value *Ptr = EmitScalarExpr(E->getArg(0));
10267     clang::CharUnits Align =
10268         getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
10269     return Builder.CreateCall(
10270         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
10271                                        Ptr->getType()}),
10272         {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
10273   };
10274   auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
10275     Value *Ptr = EmitScalarExpr(E->getArg(0));
10276     return Builder.CreateCall(
10277         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
10278                                        Ptr->getType()}),
10279         {Ptr, EmitScalarExpr(E->getArg(1))});
10280   };
10281   switch (BuiltinID) {
10282   case NVPTX::BI__nvvm_atom_add_gen_i:
10283   case NVPTX::BI__nvvm_atom_add_gen_l:
10284   case NVPTX::BI__nvvm_atom_add_gen_ll:
10285     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
10286 
10287   case NVPTX::BI__nvvm_atom_sub_gen_i:
10288   case NVPTX::BI__nvvm_atom_sub_gen_l:
10289   case NVPTX::BI__nvvm_atom_sub_gen_ll:
10290     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
10291 
10292   case NVPTX::BI__nvvm_atom_and_gen_i:
10293   case NVPTX::BI__nvvm_atom_and_gen_l:
10294   case NVPTX::BI__nvvm_atom_and_gen_ll:
10295     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
10296 
10297   case NVPTX::BI__nvvm_atom_or_gen_i:
10298   case NVPTX::BI__nvvm_atom_or_gen_l:
10299   case NVPTX::BI__nvvm_atom_or_gen_ll:
10300     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
10301 
10302   case NVPTX::BI__nvvm_atom_xor_gen_i:
10303   case NVPTX::BI__nvvm_atom_xor_gen_l:
10304   case NVPTX::BI__nvvm_atom_xor_gen_ll:
10305     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
10306 
10307   case NVPTX::BI__nvvm_atom_xchg_gen_i:
10308   case NVPTX::BI__nvvm_atom_xchg_gen_l:
10309   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
10310     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
10311 
10312   case NVPTX::BI__nvvm_atom_max_gen_i:
10313   case NVPTX::BI__nvvm_atom_max_gen_l:
10314   case NVPTX::BI__nvvm_atom_max_gen_ll:
10315     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
10316 
10317   case NVPTX::BI__nvvm_atom_max_gen_ui:
10318   case NVPTX::BI__nvvm_atom_max_gen_ul:
10319   case NVPTX::BI__nvvm_atom_max_gen_ull:
10320     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
10321 
10322   case NVPTX::BI__nvvm_atom_min_gen_i:
10323   case NVPTX::BI__nvvm_atom_min_gen_l:
10324   case NVPTX::BI__nvvm_atom_min_gen_ll:
10325     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
10326 
10327   case NVPTX::BI__nvvm_atom_min_gen_ui:
10328   case NVPTX::BI__nvvm_atom_min_gen_ul:
10329   case NVPTX::BI__nvvm_atom_min_gen_ull:
10330     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
10331 
10332   case NVPTX::BI__nvvm_atom_cas_gen_i:
10333   case NVPTX::BI__nvvm_atom_cas_gen_l:
10334   case NVPTX::BI__nvvm_atom_cas_gen_ll:
10335     // __nvvm_atom_cas_gen_* should return the old value rather than the
10336     // success flag.
10337     return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
10338 
10339   case NVPTX::BI__nvvm_atom_add_gen_f: {
10340     Value *Ptr = EmitScalarExpr(E->getArg(0));
10341     Value *Val = EmitScalarExpr(E->getArg(1));
10342     // atomicrmw only deals with integer arguments so we need to use
10343     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
10344     Value *FnALAF32 =
10345         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
10346     return Builder.CreateCall(FnALAF32, {Ptr, Val});
10347   }
10348 
10349   case NVPTX::BI__nvvm_atom_add_gen_d: {
10350     Value *Ptr = EmitScalarExpr(E->getArg(0));
10351     Value *Val = EmitScalarExpr(E->getArg(1));
10352     // atomicrmw only deals with integer arguments, so we need to use
10353     // LLVM's nvvm_atomic_load_add_f64 intrinsic.
10354     Value *FnALAF64 =
10355         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f64, Ptr->getType());
10356     return Builder.CreateCall(FnALAF64, {Ptr, Val});
10357   }
10358 
10359   case NVPTX::BI__nvvm_atom_inc_gen_ui: {
10360     Value *Ptr = EmitScalarExpr(E->getArg(0));
10361     Value *Val = EmitScalarExpr(E->getArg(1));
10362     Value *FnALI32 =
10363         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
10364     return Builder.CreateCall(FnALI32, {Ptr, Val});
10365   }
10366 
10367   case NVPTX::BI__nvvm_atom_dec_gen_ui: {
10368     Value *Ptr = EmitScalarExpr(E->getArg(0));
10369     Value *Val = EmitScalarExpr(E->getArg(1));
10370     Value *FnALD32 =
10371         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
10372     return Builder.CreateCall(FnALD32, {Ptr, Val});
10373   }
10374 
10375   case NVPTX::BI__nvvm_ldg_c:
10376   case NVPTX::BI__nvvm_ldg_c2:
10377   case NVPTX::BI__nvvm_ldg_c4:
10378   case NVPTX::BI__nvvm_ldg_s:
10379   case NVPTX::BI__nvvm_ldg_s2:
10380   case NVPTX::BI__nvvm_ldg_s4:
10381   case NVPTX::BI__nvvm_ldg_i:
10382   case NVPTX::BI__nvvm_ldg_i2:
10383   case NVPTX::BI__nvvm_ldg_i4:
10384   case NVPTX::BI__nvvm_ldg_l:
10385   case NVPTX::BI__nvvm_ldg_ll:
10386   case NVPTX::BI__nvvm_ldg_ll2:
10387   case NVPTX::BI__nvvm_ldg_uc:
10388   case NVPTX::BI__nvvm_ldg_uc2:
10389   case NVPTX::BI__nvvm_ldg_uc4:
10390   case NVPTX::BI__nvvm_ldg_us:
10391   case NVPTX::BI__nvvm_ldg_us2:
10392   case NVPTX::BI__nvvm_ldg_us4:
10393   case NVPTX::BI__nvvm_ldg_ui:
10394   case NVPTX::BI__nvvm_ldg_ui2:
10395   case NVPTX::BI__nvvm_ldg_ui4:
10396   case NVPTX::BI__nvvm_ldg_ul:
10397   case NVPTX::BI__nvvm_ldg_ull:
10398   case NVPTX::BI__nvvm_ldg_ull2:
10399     // PTX Interoperability section 2.2: "For a vector with an even number of
10400     // elements, its alignment is set to number of elements times the alignment
10401     // of its member: n*alignof(t)."
10402     return MakeLdg(Intrinsic::nvvm_ldg_global_i);
10403   case NVPTX::BI__nvvm_ldg_f:
10404   case NVPTX::BI__nvvm_ldg_f2:
10405   case NVPTX::BI__nvvm_ldg_f4:
10406   case NVPTX::BI__nvvm_ldg_d:
10407   case NVPTX::BI__nvvm_ldg_d2:
10408     return MakeLdg(Intrinsic::nvvm_ldg_global_f);
10409 
10410   case NVPTX::BI__nvvm_atom_cta_add_gen_i:
10411   case NVPTX::BI__nvvm_atom_cta_add_gen_l:
10412   case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
10413     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
10414   case NVPTX::BI__nvvm_atom_sys_add_gen_i:
10415   case NVPTX::BI__nvvm_atom_sys_add_gen_l:
10416   case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
10417     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
10418   case NVPTX::BI__nvvm_atom_cta_add_gen_f:
10419   case NVPTX::BI__nvvm_atom_cta_add_gen_d:
10420     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
10421   case NVPTX::BI__nvvm_atom_sys_add_gen_f:
10422   case NVPTX::BI__nvvm_atom_sys_add_gen_d:
10423     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
10424   case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
10425   case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
10426   case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
10427     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
10428   case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
10429   case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
10430   case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
10431     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
10432   case NVPTX::BI__nvvm_atom_cta_max_gen_i:
10433   case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
10434   case NVPTX::BI__nvvm_atom_cta_max_gen_l:
10435   case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
10436   case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
10437   case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
10438     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
10439   case NVPTX::BI__nvvm_atom_sys_max_gen_i:
10440   case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
10441   case NVPTX::BI__nvvm_atom_sys_max_gen_l:
10442   case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
10443   case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
10444   case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
10445     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
10446   case NVPTX::BI__nvvm_atom_cta_min_gen_i:
10447   case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
10448   case NVPTX::BI__nvvm_atom_cta_min_gen_l:
10449   case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
10450   case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
10451   case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
10452     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
10453   case NVPTX::BI__nvvm_atom_sys_min_gen_i:
10454   case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
10455   case NVPTX::BI__nvvm_atom_sys_min_gen_l:
10456   case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
10457   case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
10458   case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
10459     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
10460   case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
10461     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
10462   case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
10463     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
10464   case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
10465     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
10466   case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
10467     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
10468   case NVPTX::BI__nvvm_atom_cta_and_gen_i:
10469   case NVPTX::BI__nvvm_atom_cta_and_gen_l:
10470   case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
10471     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
10472   case NVPTX::BI__nvvm_atom_sys_and_gen_i:
10473   case NVPTX::BI__nvvm_atom_sys_and_gen_l:
10474   case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
10475     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
10476   case NVPTX::BI__nvvm_atom_cta_or_gen_i:
10477   case NVPTX::BI__nvvm_atom_cta_or_gen_l:
10478   case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
10479     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
10480   case NVPTX::BI__nvvm_atom_sys_or_gen_i:
10481   case NVPTX::BI__nvvm_atom_sys_or_gen_l:
10482   case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
10483     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
10484   case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
10485   case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
10486   case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
10487     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
10488   case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
10489   case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
10490   case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
10491     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
10492   case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
10493   case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
10494   case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
10495     Value *Ptr = EmitScalarExpr(E->getArg(0));
10496     return Builder.CreateCall(
10497         CGM.getIntrinsic(
10498             Intrinsic::nvvm_atomic_cas_gen_i_cta,
10499             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
10500         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
10501   }
10502   case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
10503   case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
10504   case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
10505     Value *Ptr = EmitScalarExpr(E->getArg(0));
10506     return Builder.CreateCall(
10507         CGM.getIntrinsic(
10508             Intrinsic::nvvm_atomic_cas_gen_i_sys,
10509             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
10510         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
10511   }
10512   case NVPTX::BI__nvvm_match_all_sync_i32p:
10513   case NVPTX::BI__nvvm_match_all_sync_i64p: {
10514     Value *Mask = EmitScalarExpr(E->getArg(0));
10515     Value *Val = EmitScalarExpr(E->getArg(1));
10516     Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
10517     Value *ResultPair = Builder.CreateCall(
10518         CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
10519                              ? Intrinsic::nvvm_match_all_sync_i32p
10520                              : Intrinsic::nvvm_match_all_sync_i64p),
10521         {Mask, Val});
10522     Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
10523                                      PredOutPtr.getElementType());
10524     Builder.CreateStore(Pred, PredOutPtr);
10525     return Builder.CreateExtractValue(ResultPair, 0);
10526   }
10527   case NVPTX::BI__hmma_m16n16k16_ld_a:
10528   case NVPTX::BI__hmma_m16n16k16_ld_b:
10529   case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
10530   case NVPTX::BI__hmma_m16n16k16_ld_c_f32: {
10531     Address Dst = EmitPointerWithAlignment(E->getArg(0));
10532     Value *Src = EmitScalarExpr(E->getArg(1));
10533     Value *Ldm = EmitScalarExpr(E->getArg(2));
10534     llvm::APSInt isColMajorArg;
10535     if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
10536       return nullptr;
10537     bool isColMajor = isColMajorArg.getSExtValue();
10538     unsigned IID;
10539     unsigned NumResults;
10540     switch (BuiltinID) {
10541     case NVPTX::BI__hmma_m16n16k16_ld_a:
10542       IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride
10543                        : Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride;
10544       NumResults = 8;
10545       break;
10546     case NVPTX::BI__hmma_m16n16k16_ld_b:
10547       IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride
10548                        : Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride;
10549       NumResults = 8;
10550       break;
10551     case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
10552       IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride
10553                        : Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride;
10554       NumResults = 4;
10555       break;
10556     case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
10557       IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride
10558                        : Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride;
10559       NumResults = 8;
10560       break;
10561     default:
10562       llvm_unreachable("Unexpected builtin ID.");
10563     }
10564     Value *Result =
10565         Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
10566 
10567     // Save returned values.
10568     for (unsigned i = 0; i < NumResults; ++i) {
10569       Builder.CreateAlignedStore(
10570           Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
10571                                 Dst.getElementType()),
10572           Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)),
10573           CharUnits::fromQuantity(4));
10574     }
10575     return Result;
10576   }
10577 
10578   case NVPTX::BI__hmma_m16n16k16_st_c_f16:
10579   case NVPTX::BI__hmma_m16n16k16_st_c_f32: {
10580     Value *Dst = EmitScalarExpr(E->getArg(0));
10581     Address Src = EmitPointerWithAlignment(E->getArg(1));
10582     Value *Ldm = EmitScalarExpr(E->getArg(2));
10583     llvm::APSInt isColMajorArg;
10584     if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
10585       return nullptr;
10586     bool isColMajor = isColMajorArg.getSExtValue();
10587     unsigned IID;
10588     unsigned NumResults = 8;
10589     // PTX Instructions (and LLVM instrinsics) are defined for slice _d_, yet
10590     // for some reason nvcc builtins use _c_.
10591     switch (BuiltinID) {
10592     case NVPTX::BI__hmma_m16n16k16_st_c_f16:
10593       IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride
10594                        : Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride;
10595       NumResults = 4;
10596       break;
10597     case NVPTX::BI__hmma_m16n16k16_st_c_f32:
10598       IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride
10599                        : Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride;
10600       break;
10601     default:
10602       llvm_unreachable("Unexpected builtin ID.");
10603     }
10604     Function *Intrinsic = CGM.getIntrinsic(IID, Dst->getType());
10605     llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
10606     SmallVector<Value *, 10> Values = {Dst};
10607     for (unsigned i = 0; i < NumResults; ++i) {
10608       Value *V = Builder.CreateAlignedLoad(
10609           Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)),
10610           CharUnits::fromQuantity(4));
10611       Values.push_back(Builder.CreateBitCast(V, ParamType));
10612     }
10613     Values.push_back(Ldm);
10614     Value *Result = Builder.CreateCall(Intrinsic, Values);
10615     return Result;
10616   }
10617 
10618   // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
10619   // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
10620   case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
10621   case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
10622   case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
10623   case NVPTX::BI__hmma_m16n16k16_mma_f16f32: {
10624     Address Dst = EmitPointerWithAlignment(E->getArg(0));
10625     Address SrcA = EmitPointerWithAlignment(E->getArg(1));
10626     Address SrcB = EmitPointerWithAlignment(E->getArg(2));
10627     Address SrcC = EmitPointerWithAlignment(E->getArg(3));
10628     llvm::APSInt LayoutArg;
10629     if (!E->getArg(4)->isIntegerConstantExpr(LayoutArg, getContext()))
10630       return nullptr;
10631     int Layout = LayoutArg.getSExtValue();
10632     if (Layout < 0 || Layout > 3)
10633       return nullptr;
10634     llvm::APSInt SatfArg;
10635     if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext()))
10636       return nullptr;
10637     bool Satf = SatfArg.getSExtValue();
10638 
10639     // clang-format off
10640 #define MMA_VARIANTS(type) {{                                        \
10641       Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_##type,             \
10642       Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_##type##_satfinite, \
10643       Intrinsic::nvvm_wmma_m16n16k16_mma_row_col_##type,             \
10644       Intrinsic::nvvm_wmma_m16n16k16_mma_row_col_##type##_satfinite, \
10645       Intrinsic::nvvm_wmma_m16n16k16_mma_col_row_##type,             \
10646       Intrinsic::nvvm_wmma_m16n16k16_mma_col_row_##type##_satfinite, \
10647       Intrinsic::nvvm_wmma_m16n16k16_mma_col_col_##type,             \
10648       Intrinsic::nvvm_wmma_m16n16k16_mma_col_col_##type##_satfinite  \
10649     }}
10650     // clang-format on
10651 
10652     auto getMMAIntrinsic = [Layout, Satf](std::array<unsigned, 8> Variants) {
10653       unsigned Index = Layout * 2 + Satf;
10654       assert(Index < 8);
10655       return Variants[Index];
10656     };
10657     unsigned IID;
10658     unsigned NumEltsC;
10659     unsigned NumEltsD;
10660     switch (BuiltinID) {
10661     case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
10662       IID = getMMAIntrinsic(MMA_VARIANTS(f16_f16));
10663       NumEltsC = 4;
10664       NumEltsD = 4;
10665       break;
10666     case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
10667       IID = getMMAIntrinsic(MMA_VARIANTS(f32_f16));
10668       NumEltsC = 4;
10669       NumEltsD = 8;
10670       break;
10671     case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
10672       IID = getMMAIntrinsic(MMA_VARIANTS(f16_f32));
10673       NumEltsC = 8;
10674       NumEltsD = 4;
10675       break;
10676     case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
10677       IID = getMMAIntrinsic(MMA_VARIANTS(f32_f32));
10678       NumEltsC = 8;
10679       NumEltsD = 8;
10680       break;
10681     default:
10682       llvm_unreachable("Unexpected builtin ID.");
10683     }
10684 #undef MMA_VARIANTS
10685 
10686     SmallVector<Value *, 24> Values;
10687     Function *Intrinsic = CGM.getIntrinsic(IID);
10688     llvm::Type *ABType = Intrinsic->getFunctionType()->getParamType(0);
10689     // Load A
10690     for (unsigned i = 0; i < 8; ++i) {
10691       Value *V = Builder.CreateAlignedLoad(
10692           Builder.CreateGEP(SrcA.getPointer(),
10693                             llvm::ConstantInt::get(IntTy, i)),
10694           CharUnits::fromQuantity(4));
10695       Values.push_back(Builder.CreateBitCast(V, ABType));
10696     }
10697     // Load B
10698     for (unsigned i = 0; i < 8; ++i) {
10699       Value *V = Builder.CreateAlignedLoad(
10700           Builder.CreateGEP(SrcB.getPointer(),
10701                             llvm::ConstantInt::get(IntTy, i)),
10702           CharUnits::fromQuantity(4));
10703       Values.push_back(Builder.CreateBitCast(V, ABType));
10704     }
10705     // Load C
10706     llvm::Type *CType = Intrinsic->getFunctionType()->getParamType(16);
10707     for (unsigned i = 0; i < NumEltsC; ++i) {
10708       Value *V = Builder.CreateAlignedLoad(
10709           Builder.CreateGEP(SrcC.getPointer(),
10710                             llvm::ConstantInt::get(IntTy, i)),
10711           CharUnits::fromQuantity(4));
10712       Values.push_back(Builder.CreateBitCast(V, CType));
10713     }
10714     Value *Result = Builder.CreateCall(Intrinsic, Values);
10715     llvm::Type *DType = Dst.getElementType();
10716     for (unsigned i = 0; i < NumEltsD; ++i)
10717       Builder.CreateAlignedStore(
10718           Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
10719           Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)),
10720           CharUnits::fromQuantity(4));
10721     return Result;
10722   }
10723   default:
10724     return nullptr;
10725   }
10726 }
10727 
10728 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
10729                                                    const CallExpr *E) {
10730   switch (BuiltinID) {
10731   case WebAssembly::BI__builtin_wasm_mem_size: {
10732     llvm::Type *ResultType = ConvertType(E->getType());
10733     Value *I = EmitScalarExpr(E->getArg(0));
10734     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_size, ResultType);
10735     return Builder.CreateCall(Callee, I);
10736   }
10737   case WebAssembly::BI__builtin_wasm_mem_grow: {
10738     llvm::Type *ResultType = ConvertType(E->getType());
10739     Value *Args[] = {
10740       EmitScalarExpr(E->getArg(0)),
10741       EmitScalarExpr(E->getArg(1))
10742     };
10743     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_grow, ResultType);
10744     return Builder.CreateCall(Callee, Args);
10745   }
10746   case WebAssembly::BI__builtin_wasm_current_memory: {
10747     llvm::Type *ResultType = ConvertType(E->getType());
10748     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
10749     return Builder.CreateCall(Callee);
10750   }
10751   case WebAssembly::BI__builtin_wasm_grow_memory: {
10752     Value *X = EmitScalarExpr(E->getArg(0));
10753     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
10754     return Builder.CreateCall(Callee, X);
10755   }
10756   case WebAssembly::BI__builtin_wasm_throw: {
10757     Value *Tag = EmitScalarExpr(E->getArg(0));
10758     Value *Obj = EmitScalarExpr(E->getArg(1));
10759     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
10760     return Builder.CreateCall(Callee, {Tag, Obj});
10761   }
10762   case WebAssembly::BI__builtin_wasm_rethrow: {
10763     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
10764     return Builder.CreateCall(Callee);
10765   }
10766 
10767   default:
10768     return nullptr;
10769   }
10770 }
10771 
10772 Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
10773                                                const CallExpr *E) {
10774   SmallVector<llvm::Value *, 4> Ops;
10775   Intrinsic::ID ID = Intrinsic::not_intrinsic;
10776 
10777   auto MakeCircLd = [&](unsigned IntID, bool HasImm = true) {
10778     // The base pointer is passed by address, so it needs to be loaded.
10779     Address BP = EmitPointerWithAlignment(E->getArg(0));
10780     BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy),
10781                  BP.getAlignment());
10782     llvm::Value *Base = Builder.CreateLoad(BP);
10783     // Operands are Base, Increment, Modifier, Start.
10784     if (HasImm)
10785       Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)),
10786               EmitScalarExpr(E->getArg(3)) };
10787     else
10788       Ops = { Base, EmitScalarExpr(E->getArg(1)),
10789               EmitScalarExpr(E->getArg(2)) };
10790 
10791     llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
10792     llvm::Value *NewBase = Builder.CreateExtractValue(Result, 1);
10793     llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
10794                                             NewBase->getType()->getPointerTo());
10795     Address Dest = EmitPointerWithAlignment(E->getArg(0));
10796     // The intrinsic generates two results. The new value for the base pointer
10797     // needs to be stored.
10798     Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
10799     return Builder.CreateExtractValue(Result, 0);
10800   };
10801 
10802   auto MakeCircSt = [&](unsigned IntID, bool HasImm = true) {
10803     // The base pointer is passed by address, so it needs to be loaded.
10804     Address BP = EmitPointerWithAlignment(E->getArg(0));
10805     BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy),
10806                  BP.getAlignment());
10807     llvm::Value *Base = Builder.CreateLoad(BP);
10808     // Operands are Base, Increment, Modifier, Value, Start.
10809     if (HasImm)
10810       Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)),
10811               EmitScalarExpr(E->getArg(3)), EmitScalarExpr(E->getArg(4)) };
10812     else
10813       Ops = { Base, EmitScalarExpr(E->getArg(1)),
10814               EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)) };
10815 
10816     llvm::Value *NewBase = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
10817     llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
10818                                             NewBase->getType()->getPointerTo());
10819     Address Dest = EmitPointerWithAlignment(E->getArg(0));
10820     // The intrinsic generates one result, which is the new value for the base
10821     // pointer. It needs to be stored.
10822     return Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
10823   };
10824 
10825   // Handle the conversion of bit-reverse load intrinsics to bit code.
10826   // The intrinsic call after this function only reads from memory and the
10827   // write to memory is dealt by the store instruction.
10828   auto MakeBrevLd = [&](unsigned IntID, llvm::Type *DestTy) {
10829     // The intrinsic generates one result, which is the new value for the base
10830     // pointer. It needs to be returned. The result of the load instruction is
10831     // passed to intrinsic by address, so the value needs to be stored.
10832     llvm::Value *BaseAddress =
10833         Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
10834 
10835     // Expressions like &(*pt++) will be incremented per evaluation.
10836     // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
10837     // per call.
10838     Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
10839     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), Int8PtrTy),
10840                        DestAddr.getAlignment());
10841     llvm::Value *DestAddress = DestAddr.getPointer();
10842 
10843     // Operands are Base, Dest, Modifier.
10844     // The intrinsic format in LLVM IR is defined as
10845     // { ValueType, i8* } (i8*, i32).
10846     Ops = {BaseAddress, EmitScalarExpr(E->getArg(2))};
10847 
10848     llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
10849     // The value needs to be stored as the variable is passed by reference.
10850     llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
10851 
10852     // The store needs to be truncated to fit the destination type.
10853     // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
10854     // to be handled with stores of respective destination type.
10855     DestVal = Builder.CreateTrunc(DestVal, DestTy);
10856 
10857     llvm::Value *DestForStore =
10858         Builder.CreateBitCast(DestAddress, DestVal->getType()->getPointerTo());
10859     Builder.CreateAlignedStore(DestVal, DestForStore, DestAddr.getAlignment());
10860     // The updated value of the base pointer is returned.
10861     return Builder.CreateExtractValue(Result, 1);
10862   };
10863 
10864   switch (BuiltinID) {
10865   case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
10866   case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: {
10867     Address Dest = EmitPointerWithAlignment(E->getArg(2));
10868     unsigned Size;
10869     if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vaddcarry) {
10870       Size = 512;
10871       ID = Intrinsic::hexagon_V6_vaddcarry;
10872     } else {
10873       Size = 1024;
10874       ID = Intrinsic::hexagon_V6_vaddcarry_128B;
10875     }
10876     Dest = Builder.CreateBitCast(Dest,
10877         llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
10878     LoadInst *QLd = Builder.CreateLoad(Dest);
10879     Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
10880     llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
10881     llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
10882     llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
10883                                               Vprd->getType()->getPointerTo(0));
10884     Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
10885     return Builder.CreateExtractValue(Result, 0);
10886   }
10887   case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
10888   case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
10889     Address Dest = EmitPointerWithAlignment(E->getArg(2));
10890     unsigned Size;
10891     if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vsubcarry) {
10892       Size = 512;
10893       ID = Intrinsic::hexagon_V6_vsubcarry;
10894     } else {
10895       Size = 1024;
10896       ID = Intrinsic::hexagon_V6_vsubcarry_128B;
10897     }
10898     Dest = Builder.CreateBitCast(Dest,
10899         llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
10900     LoadInst *QLd = Builder.CreateLoad(Dest);
10901     Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
10902     llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
10903     llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
10904     llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
10905                                               Vprd->getType()->getPointerTo(0));
10906     Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
10907     return Builder.CreateExtractValue(Result, 0);
10908   }
10909   case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
10910     return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pci);
10911   case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
10912     return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pci);
10913   case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
10914     return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pci);
10915   case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
10916     return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pci);
10917   case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
10918     return MakeCircLd(Intrinsic::hexagon_L2_loadri_pci);
10919   case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
10920     return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pci);
10921   case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
10922     return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pcr, /*HasImm=*/false);
10923   case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
10924     return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pcr, /*HasImm=*/false);
10925   case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
10926     return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pcr, /*HasImm=*/false);
10927   case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
10928     return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pcr, /*HasImm=*/false);
10929   case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
10930     return MakeCircLd(Intrinsic::hexagon_L2_loadri_pcr, /*HasImm=*/false);
10931   case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
10932     return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pcr, /*HasImm=*/false);
10933   case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
10934     return MakeCircSt(Intrinsic::hexagon_S2_storerb_pci);
10935   case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
10936     return MakeCircSt(Intrinsic::hexagon_S2_storerh_pci);
10937   case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
10938     return MakeCircSt(Intrinsic::hexagon_S2_storerf_pci);
10939   case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
10940     return MakeCircSt(Intrinsic::hexagon_S2_storeri_pci);
10941   case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
10942     return MakeCircSt(Intrinsic::hexagon_S2_storerd_pci);
10943   case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
10944     return MakeCircSt(Intrinsic::hexagon_S2_storerb_pcr, /*HasImm=*/false);
10945   case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
10946     return MakeCircSt(Intrinsic::hexagon_S2_storerh_pcr, /*HasImm=*/false);
10947   case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
10948     return MakeCircSt(Intrinsic::hexagon_S2_storerf_pcr, /*HasImm=*/false);
10949   case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
10950     return MakeCircSt(Intrinsic::hexagon_S2_storeri_pcr, /*HasImm=*/false);
10951   case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
10952     return MakeCircSt(Intrinsic::hexagon_S2_storerd_pcr, /*HasImm=*/false);
10953   case Hexagon::BI__builtin_brev_ldub:
10954     return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
10955   case Hexagon::BI__builtin_brev_ldb:
10956     return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
10957   case Hexagon::BI__builtin_brev_lduh:
10958     return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
10959   case Hexagon::BI__builtin_brev_ldh:
10960     return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
10961   case Hexagon::BI__builtin_brev_ldw:
10962     return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
10963   case Hexagon::BI__builtin_brev_ldd:
10964     return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
10965   default:
10966     break;
10967   } // switch
10968 
10969   return nullptr;
10970 }
10971