1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit Builtin calls as LLVM code.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCXXABI.h"
14 #include "CGObjCRuntime.h"
15 #include "CGOpenCLRuntime.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "CodeGenModule.h"
19 #include "ConstantEmitter.h"
20 #include "PatternInit.h"
21 #include "TargetInfo.h"
22 #include "clang/AST/ASTContext.h"
23 #include "clang/AST/Decl.h"
24 #include "clang/AST/OSLog.h"
25 #include "clang/Basic/TargetBuiltins.h"
26 #include "clang/Basic/TargetInfo.h"
27 #include "clang/CodeGen/CGFunctionInfo.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/InlineAsm.h"
32 #include "llvm/IR/Intrinsics.h"
33 #include "llvm/IR/MDBuilder.h"
34 #include "llvm/Support/ConvertUTF.h"
35 #include "llvm/Support/ScopedPrinter.h"
36 #include "llvm/Support/TargetParser.h"
37 #include <sstream>
38 
39 using namespace clang;
40 using namespace CodeGen;
41 using namespace llvm;
42 
43 static
44 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
45   return std::min(High, std::max(Low, Value));
46 }
47 
48 static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, unsigned AlignmentInBytes) {
49   ConstantInt *Byte;
50   switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
51   case LangOptions::TrivialAutoVarInitKind::Uninitialized:
52     // Nothing to initialize.
53     return;
54   case LangOptions::TrivialAutoVarInitKind::Zero:
55     Byte = CGF.Builder.getInt8(0x00);
56     break;
57   case LangOptions::TrivialAutoVarInitKind::Pattern: {
58     llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
59     Byte = llvm::dyn_cast<llvm::ConstantInt>(
60         initializationPatternFor(CGF.CGM, Int8));
61     break;
62   }
63   }
64   CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
65 }
66 
67 /// getBuiltinLibFunction - Given a builtin id for a function like
68 /// "__builtin_fabsf", return a Function* for "fabsf".
69 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
70                                                      unsigned BuiltinID) {
71   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
72 
73   // Get the name, skip over the __builtin_ prefix (if necessary).
74   StringRef Name;
75   GlobalDecl D(FD);
76 
77   // If the builtin has been declared explicitly with an assembler label,
78   // use the mangled name. This differs from the plain label on platforms
79   // that prefix labels.
80   if (FD->hasAttr<AsmLabelAttr>())
81     Name = getMangledName(D);
82   else
83     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
84 
85   llvm::FunctionType *Ty =
86     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
87 
88   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
89 }
90 
91 /// Emit the conversions required to turn the given value into an
92 /// integer of the given size.
93 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
94                         QualType T, llvm::IntegerType *IntType) {
95   V = CGF.EmitToMemory(V, T);
96 
97   if (V->getType()->isPointerTy())
98     return CGF.Builder.CreatePtrToInt(V, IntType);
99 
100   assert(V->getType() == IntType);
101   return V;
102 }
103 
104 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
105                           QualType T, llvm::Type *ResultType) {
106   V = CGF.EmitFromMemory(V, T);
107 
108   if (ResultType->isPointerTy())
109     return CGF.Builder.CreateIntToPtr(V, ResultType);
110 
111   assert(V->getType() == ResultType);
112   return V;
113 }
114 
115 /// Utility to insert an atomic instruction based on Intrinsic::ID
116 /// and the expression node.
117 static Value *MakeBinaryAtomicValue(
118     CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
119     AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
120   QualType T = E->getType();
121   assert(E->getArg(0)->getType()->isPointerType());
122   assert(CGF.getContext().hasSameUnqualifiedType(T,
123                                   E->getArg(0)->getType()->getPointeeType()));
124   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
125 
126   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
127   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
128 
129   llvm::IntegerType *IntType =
130     llvm::IntegerType::get(CGF.getLLVMContext(),
131                            CGF.getContext().getTypeSize(T));
132   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
133 
134   llvm::Value *Args[2];
135   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
136   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
137   llvm::Type *ValueType = Args[1]->getType();
138   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
139 
140   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
141       Kind, Args[0], Args[1], Ordering);
142   return EmitFromInt(CGF, Result, T, ValueType);
143 }
144 
145 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
146   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
147   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
148 
149   // Convert the type of the pointer to a pointer to the stored type.
150   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
151   Value *BC = CGF.Builder.CreateBitCast(
152       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
153   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
154   LV.setNontemporal(true);
155   CGF.EmitStoreOfScalar(Val, LV, false);
156   return nullptr;
157 }
158 
159 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
160   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
161 
162   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
163   LV.setNontemporal(true);
164   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
165 }
166 
167 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
168                                llvm::AtomicRMWInst::BinOp Kind,
169                                const CallExpr *E) {
170   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
171 }
172 
173 /// Utility to insert an atomic instruction based Intrinsic::ID and
174 /// the expression node, where the return value is the result of the
175 /// operation.
176 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
177                                    llvm::AtomicRMWInst::BinOp Kind,
178                                    const CallExpr *E,
179                                    Instruction::BinaryOps Op,
180                                    bool Invert = false) {
181   QualType T = E->getType();
182   assert(E->getArg(0)->getType()->isPointerType());
183   assert(CGF.getContext().hasSameUnqualifiedType(T,
184                                   E->getArg(0)->getType()->getPointeeType()));
185   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
186 
187   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
188   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
189 
190   llvm::IntegerType *IntType =
191     llvm::IntegerType::get(CGF.getLLVMContext(),
192                            CGF.getContext().getTypeSize(T));
193   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
194 
195   llvm::Value *Args[2];
196   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
197   llvm::Type *ValueType = Args[1]->getType();
198   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
199   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
200 
201   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
202       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
203   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
204   if (Invert)
205     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
206                                      llvm::ConstantInt::get(IntType, -1));
207   Result = EmitFromInt(CGF, Result, T, ValueType);
208   return RValue::get(Result);
209 }
210 
211 /// Utility to insert an atomic cmpxchg instruction.
212 ///
213 /// @param CGF The current codegen function.
214 /// @param E   Builtin call expression to convert to cmpxchg.
215 ///            arg0 - address to operate on
216 ///            arg1 - value to compare with
217 ///            arg2 - new value
218 /// @param ReturnBool Specifies whether to return success flag of
219 ///                   cmpxchg result or the old value.
220 ///
221 /// @returns result of cmpxchg, according to ReturnBool
222 ///
223 /// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
224 /// invoke the function EmitAtomicCmpXchgForMSIntrin.
225 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
226                                      bool ReturnBool) {
227   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
228   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
229   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
230 
231   llvm::IntegerType *IntType = llvm::IntegerType::get(
232       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
233   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
234 
235   Value *Args[3];
236   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
237   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
238   llvm::Type *ValueType = Args[1]->getType();
239   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
240   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
241 
242   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
243       Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
244       llvm::AtomicOrdering::SequentiallyConsistent);
245   if (ReturnBool)
246     // Extract boolean success flag and zext it to int.
247     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
248                                   CGF.ConvertType(E->getType()));
249   else
250     // Extract old value and emit it using the same type as compare value.
251     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
252                        ValueType);
253 }
254 
255 /// This function should be invoked to emit atomic cmpxchg for Microsoft's
256 /// _InterlockedCompareExchange* intrinsics which have the following signature:
257 /// T _InterlockedCompareExchange(T volatile *Destination,
258 ///                               T Exchange,
259 ///                               T Comparand);
260 ///
261 /// Whereas the llvm 'cmpxchg' instruction has the following syntax:
262 /// cmpxchg *Destination, Comparand, Exchange.
263 /// So we need to swap Comparand and Exchange when invoking
264 /// CreateAtomicCmpXchg. That is the reason we could not use the above utility
265 /// function MakeAtomicCmpXchgValue since it expects the arguments to be
266 /// already swapped.
267 
268 static
269 Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
270     AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
271   assert(E->getArg(0)->getType()->isPointerType());
272   assert(CGF.getContext().hasSameUnqualifiedType(
273       E->getType(), E->getArg(0)->getType()->getPointeeType()));
274   assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
275                                                  E->getArg(1)->getType()));
276   assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
277                                                  E->getArg(2)->getType()));
278 
279   auto *Destination = CGF.EmitScalarExpr(E->getArg(0));
280   auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
281   auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
282 
283   // For Release ordering, the failure ordering should be Monotonic.
284   auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
285                          AtomicOrdering::Monotonic :
286                          SuccessOrdering;
287 
288   auto *Result = CGF.Builder.CreateAtomicCmpXchg(
289                    Destination, Comparand, Exchange,
290                    SuccessOrdering, FailureOrdering);
291   Result->setVolatile(true);
292   return CGF.Builder.CreateExtractValue(Result, 0);
293 }
294 
295 static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E,
296     AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
297   assert(E->getArg(0)->getType()->isPointerType());
298 
299   auto *IntTy = CGF.ConvertType(E->getType());
300   auto *Result = CGF.Builder.CreateAtomicRMW(
301                    AtomicRMWInst::Add,
302                    CGF.EmitScalarExpr(E->getArg(0)),
303                    ConstantInt::get(IntTy, 1),
304                    Ordering);
305   return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
306 }
307 
308 static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E,
309     AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
310   assert(E->getArg(0)->getType()->isPointerType());
311 
312   auto *IntTy = CGF.ConvertType(E->getType());
313   auto *Result = CGF.Builder.CreateAtomicRMW(
314                    AtomicRMWInst::Sub,
315                    CGF.EmitScalarExpr(E->getArg(0)),
316                    ConstantInt::get(IntTy, 1),
317                    Ordering);
318   return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
319 }
320 
321 // Build a plain volatile load.
322 static Value *EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E) {
323   Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
324   QualType ElTy = E->getArg(0)->getType()->getPointeeType();
325   CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
326   llvm::Type *ITy =
327       llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
328   Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo());
329   llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(Ptr, LoadSize);
330   Load->setVolatile(true);
331   return Load;
332 }
333 
334 // Build a plain volatile store.
335 static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) {
336   Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
337   Value *Value = CGF.EmitScalarExpr(E->getArg(1));
338   QualType ElTy = E->getArg(0)->getType()->getPointeeType();
339   CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
340   llvm::Type *ITy =
341       llvm::IntegerType::get(CGF.getLLVMContext(), StoreSize.getQuantity() * 8);
342   Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo());
343   llvm::StoreInst *Store =
344       CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
345   Store->setVolatile(true);
346   return Store;
347 }
348 
349 // Emit a simple mangled intrinsic that has 1 argument and a return type
350 // matching the argument type.
351 static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
352                                const CallExpr *E,
353                                unsigned IntrinsicID) {
354   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
355 
356   Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
357   return CGF.Builder.CreateCall(F, Src0);
358 }
359 
360 // Emit an intrinsic that has 2 operands of the same type as its result.
361 static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
362                                 const CallExpr *E,
363                                 unsigned IntrinsicID) {
364   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
365   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
366 
367   Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
368   return CGF.Builder.CreateCall(F, { Src0, Src1 });
369 }
370 
371 // Emit an intrinsic that has 3 operands of the same type as its result.
372 static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
373                                  const CallExpr *E,
374                                  unsigned IntrinsicID) {
375   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
376   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
377   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
378 
379   Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
380   return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
381 }
382 
383 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
384 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
385                                const CallExpr *E,
386                                unsigned IntrinsicID) {
387   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
388   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
389 
390   Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
391   return CGF.Builder.CreateCall(F, {Src0, Src1});
392 }
393 
394 /// EmitFAbs - Emit a call to @llvm.fabs().
395 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
396   Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
397   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
398   Call->setDoesNotAccessMemory();
399   return Call;
400 }
401 
402 /// Emit the computation of the sign bit for a floating point value. Returns
403 /// the i1 sign bit value.
404 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
405   LLVMContext &C = CGF.CGM.getLLVMContext();
406 
407   llvm::Type *Ty = V->getType();
408   int Width = Ty->getPrimitiveSizeInBits();
409   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
410   V = CGF.Builder.CreateBitCast(V, IntTy);
411   if (Ty->isPPC_FP128Ty()) {
412     // We want the sign bit of the higher-order double. The bitcast we just
413     // did works as if the double-double was stored to memory and then
414     // read as an i128. The "store" will put the higher-order double in the
415     // lower address in both little- and big-Endian modes, but the "load"
416     // will treat those bits as a different part of the i128: the low bits in
417     // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
418     // we need to shift the high bits down to the low before truncating.
419     Width >>= 1;
420     if (CGF.getTarget().isBigEndian()) {
421       Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
422       V = CGF.Builder.CreateLShr(V, ShiftCst);
423     }
424     // We are truncating value in order to extract the higher-order
425     // double, which we will be using to extract the sign from.
426     IntTy = llvm::IntegerType::get(C, Width);
427     V = CGF.Builder.CreateTrunc(V, IntTy);
428   }
429   Value *Zero = llvm::Constant::getNullValue(IntTy);
430   return CGF.Builder.CreateICmpSLT(V, Zero);
431 }
432 
433 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
434                               const CallExpr *E, llvm::Constant *calleeValue) {
435   CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
436   return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
437 }
438 
439 /// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
440 /// depending on IntrinsicID.
441 ///
442 /// \arg CGF The current codegen function.
443 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
444 /// \arg X The first argument to the llvm.*.with.overflow.*.
445 /// \arg Y The second argument to the llvm.*.with.overflow.*.
446 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
447 /// \returns The result (i.e. sum/product) returned by the intrinsic.
448 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
449                                           const llvm::Intrinsic::ID IntrinsicID,
450                                           llvm::Value *X, llvm::Value *Y,
451                                           llvm::Value *&Carry) {
452   // Make sure we have integers of the same width.
453   assert(X->getType() == Y->getType() &&
454          "Arguments must be the same type. (Did you forget to make sure both "
455          "arguments have the same integer width?)");
456 
457   Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
458   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
459   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
460   return CGF.Builder.CreateExtractValue(Tmp, 0);
461 }
462 
463 static Value *emitRangedBuiltin(CodeGenFunction &CGF,
464                                 unsigned IntrinsicID,
465                                 int low, int high) {
466     llvm::MDBuilder MDHelper(CGF.getLLVMContext());
467     llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
468     Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
469     llvm::Instruction *Call = CGF.Builder.CreateCall(F);
470     Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
471     return Call;
472 }
473 
474 namespace {
475   struct WidthAndSignedness {
476     unsigned Width;
477     bool Signed;
478   };
479 }
480 
481 static WidthAndSignedness
482 getIntegerWidthAndSignedness(const clang::ASTContext &context,
483                              const clang::QualType Type) {
484   assert(Type->isIntegerType() && "Given type is not an integer.");
485   unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
486   bool Signed = Type->isSignedIntegerType();
487   return {Width, Signed};
488 }
489 
490 // Given one or more integer types, this function produces an integer type that
491 // encompasses them: any value in one of the given types could be expressed in
492 // the encompassing type.
493 static struct WidthAndSignedness
494 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
495   assert(Types.size() > 0 && "Empty list of types.");
496 
497   // If any of the given types is signed, we must return a signed type.
498   bool Signed = false;
499   for (const auto &Type : Types) {
500     Signed |= Type.Signed;
501   }
502 
503   // The encompassing type must have a width greater than or equal to the width
504   // of the specified types.  Additionally, if the encompassing type is signed,
505   // its width must be strictly greater than the width of any unsigned types
506   // given.
507   unsigned Width = 0;
508   for (const auto &Type : Types) {
509     unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
510     if (Width < MinWidth) {
511       Width = MinWidth;
512     }
513   }
514 
515   return {Width, Signed};
516 }
517 
518 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
519   llvm::Type *DestType = Int8PtrTy;
520   if (ArgValue->getType() != DestType)
521     ArgValue =
522         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
523 
524   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
525   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
526 }
527 
528 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
529 /// __builtin_object_size(p, @p To) is correct
530 static bool areBOSTypesCompatible(int From, int To) {
531   // Note: Our __builtin_object_size implementation currently treats Type=0 and
532   // Type=2 identically. Encoding this implementation detail here may make
533   // improving __builtin_object_size difficult in the future, so it's omitted.
534   return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
535 }
536 
537 static llvm::Value *
538 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
539   return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
540 }
541 
542 llvm::Value *
543 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
544                                                  llvm::IntegerType *ResType,
545                                                  llvm::Value *EmittedE,
546                                                  bool IsDynamic) {
547   uint64_t ObjectSize;
548   if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
549     return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
550   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
551 }
552 
553 /// Returns a Value corresponding to the size of the given expression.
554 /// This Value may be either of the following:
555 ///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
556 ///     it)
557 ///   - A call to the @llvm.objectsize intrinsic
558 ///
559 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
560 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
561 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
562 llvm::Value *
563 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
564                                        llvm::IntegerType *ResType,
565                                        llvm::Value *EmittedE, bool IsDynamic) {
566   // We need to reference an argument if the pointer is a parameter with the
567   // pass_object_size attribute.
568   if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
569     auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
570     auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
571     if (Param != nullptr && PS != nullptr &&
572         areBOSTypesCompatible(PS->getType(), Type)) {
573       auto Iter = SizeArguments.find(Param);
574       assert(Iter != SizeArguments.end());
575 
576       const ImplicitParamDecl *D = Iter->second;
577       auto DIter = LocalDeclMap.find(D);
578       assert(DIter != LocalDeclMap.end());
579 
580       return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
581                               getContext().getSizeType(), E->getBeginLoc());
582     }
583   }
584 
585   // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
586   // evaluate E for side-effects. In either case, we shouldn't lower to
587   // @llvm.objectsize.
588   if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
589     return getDefaultBuiltinObjectSizeResult(Type, ResType);
590 
591   Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
592   assert(Ptr->getType()->isPointerTy() &&
593          "Non-pointer passed to __builtin_object_size?");
594 
595   Function *F =
596       CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
597 
598   // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
599   Value *Min = Builder.getInt1((Type & 2) != 0);
600   // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
601   Value *NullIsUnknown = Builder.getTrue();
602   Value *Dynamic = Builder.getInt1(IsDynamic);
603   return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
604 }
605 
606 namespace {
607 /// A struct to generically describe a bit test intrinsic.
608 struct BitTest {
609   enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
610   enum InterlockingKind : uint8_t {
611     Unlocked,
612     Sequential,
613     Acquire,
614     Release,
615     NoFence
616   };
617 
618   ActionKind Action;
619   InterlockingKind Interlocking;
620   bool Is64Bit;
621 
622   static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
623 };
624 } // namespace
625 
626 BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
627   switch (BuiltinID) {
628     // Main portable variants.
629   case Builtin::BI_bittest:
630     return {TestOnly, Unlocked, false};
631   case Builtin::BI_bittestandcomplement:
632     return {Complement, Unlocked, false};
633   case Builtin::BI_bittestandreset:
634     return {Reset, Unlocked, false};
635   case Builtin::BI_bittestandset:
636     return {Set, Unlocked, false};
637   case Builtin::BI_interlockedbittestandreset:
638     return {Reset, Sequential, false};
639   case Builtin::BI_interlockedbittestandset:
640     return {Set, Sequential, false};
641 
642     // X86-specific 64-bit variants.
643   case Builtin::BI_bittest64:
644     return {TestOnly, Unlocked, true};
645   case Builtin::BI_bittestandcomplement64:
646     return {Complement, Unlocked, true};
647   case Builtin::BI_bittestandreset64:
648     return {Reset, Unlocked, true};
649   case Builtin::BI_bittestandset64:
650     return {Set, Unlocked, true};
651   case Builtin::BI_interlockedbittestandreset64:
652     return {Reset, Sequential, true};
653   case Builtin::BI_interlockedbittestandset64:
654     return {Set, Sequential, true};
655 
656     // ARM/AArch64-specific ordering variants.
657   case Builtin::BI_interlockedbittestandset_acq:
658     return {Set, Acquire, false};
659   case Builtin::BI_interlockedbittestandset_rel:
660     return {Set, Release, false};
661   case Builtin::BI_interlockedbittestandset_nf:
662     return {Set, NoFence, false};
663   case Builtin::BI_interlockedbittestandreset_acq:
664     return {Reset, Acquire, false};
665   case Builtin::BI_interlockedbittestandreset_rel:
666     return {Reset, Release, false};
667   case Builtin::BI_interlockedbittestandreset_nf:
668     return {Reset, NoFence, false};
669   }
670   llvm_unreachable("expected only bittest intrinsics");
671 }
672 
673 static char bitActionToX86BTCode(BitTest::ActionKind A) {
674   switch (A) {
675   case BitTest::TestOnly:   return '\0';
676   case BitTest::Complement: return 'c';
677   case BitTest::Reset:      return 'r';
678   case BitTest::Set:        return 's';
679   }
680   llvm_unreachable("invalid action");
681 }
682 
683 static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF,
684                                             BitTest BT,
685                                             const CallExpr *E, Value *BitBase,
686                                             Value *BitPos) {
687   char Action = bitActionToX86BTCode(BT.Action);
688   char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
689 
690   // Build the assembly.
691   SmallString<64> Asm;
692   raw_svector_ostream AsmOS(Asm);
693   if (BT.Interlocking != BitTest::Unlocked)
694     AsmOS << "lock ";
695   AsmOS << "bt";
696   if (Action)
697     AsmOS << Action;
698   AsmOS << SizeSuffix << " $2, ($1)\n\tsetc ${0:b}";
699 
700   // Build the constraints. FIXME: We should support immediates when possible.
701   std::string Constraints = "=r,r,r,~{cc},~{flags},~{fpsr}";
702   llvm::IntegerType *IntType = llvm::IntegerType::get(
703       CGF.getLLVMContext(),
704       CGF.getContext().getTypeSize(E->getArg(1)->getType()));
705   llvm::Type *IntPtrType = IntType->getPointerTo();
706   llvm::FunctionType *FTy =
707       llvm::FunctionType::get(CGF.Int8Ty, {IntPtrType, IntType}, false);
708 
709   llvm::InlineAsm *IA =
710       llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
711   return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
712 }
713 
714 static llvm::AtomicOrdering
715 getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
716   switch (I) {
717   case BitTest::Unlocked:   return llvm::AtomicOrdering::NotAtomic;
718   case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
719   case BitTest::Acquire:    return llvm::AtomicOrdering::Acquire;
720   case BitTest::Release:    return llvm::AtomicOrdering::Release;
721   case BitTest::NoFence:    return llvm::AtomicOrdering::Monotonic;
722   }
723   llvm_unreachable("invalid interlocking");
724 }
725 
726 /// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
727 /// bits and a bit position and read and optionally modify the bit at that
728 /// position. The position index can be arbitrarily large, i.e. it can be larger
729 /// than 31 or 63, so we need an indexed load in the general case.
730 static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
731                                          unsigned BuiltinID,
732                                          const CallExpr *E) {
733   Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
734   Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
735 
736   BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
737 
738   // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
739   // indexing operation internally. Use them if possible.
740   llvm::Triple::ArchType Arch = CGF.getTarget().getTriple().getArch();
741   if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64)
742     return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
743 
744   // Otherwise, use generic code to load one byte and test the bit. Use all but
745   // the bottom three bits as the array index, and the bottom three bits to form
746   // a mask.
747   // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
748   Value *ByteIndex = CGF.Builder.CreateAShr(
749       BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
750   Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
751   Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
752                                                  ByteIndex, "bittest.byteaddr"),
753                    CharUnits::One());
754   Value *PosLow =
755       CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
756                             llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
757 
758   // The updating instructions will need a mask.
759   Value *Mask = nullptr;
760   if (BT.Action != BitTest::TestOnly) {
761     Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
762                                  "bittest.mask");
763   }
764 
765   // Check the action and ordering of the interlocked intrinsics.
766   llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
767 
768   Value *OldByte = nullptr;
769   if (Ordering != llvm::AtomicOrdering::NotAtomic) {
770     // Emit a combined atomicrmw load/store operation for the interlocked
771     // intrinsics.
772     llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
773     if (BT.Action == BitTest::Reset) {
774       Mask = CGF.Builder.CreateNot(Mask);
775       RMWOp = llvm::AtomicRMWInst::And;
776     }
777     OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr.getPointer(), Mask,
778                                           Ordering);
779   } else {
780     // Emit a plain load for the non-interlocked intrinsics.
781     OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
782     Value *NewByte = nullptr;
783     switch (BT.Action) {
784     case BitTest::TestOnly:
785       // Don't store anything.
786       break;
787     case BitTest::Complement:
788       NewByte = CGF.Builder.CreateXor(OldByte, Mask);
789       break;
790     case BitTest::Reset:
791       NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
792       break;
793     case BitTest::Set:
794       NewByte = CGF.Builder.CreateOr(OldByte, Mask);
795       break;
796     }
797     if (NewByte)
798       CGF.Builder.CreateStore(NewByte, ByteAddr);
799   }
800 
801   // However we loaded the old byte, either by plain load or atomicrmw, shift
802   // the bit into the low position and mask it to 0 or 1.
803   Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
804   return CGF.Builder.CreateAnd(
805       ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
806 }
807 
808 namespace {
809 enum class MSVCSetJmpKind {
810   _setjmpex,
811   _setjmp3,
812   _setjmp
813 };
814 }
815 
816 /// MSVC handles setjmp a bit differently on different platforms. On every
817 /// architecture except 32-bit x86, the frame address is passed. On x86, extra
818 /// parameters can be passed as variadic arguments, but we always pass none.
819 static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
820                                const CallExpr *E) {
821   llvm::Value *Arg1 = nullptr;
822   llvm::Type *Arg1Ty = nullptr;
823   StringRef Name;
824   bool IsVarArg = false;
825   if (SJKind == MSVCSetJmpKind::_setjmp3) {
826     Name = "_setjmp3";
827     Arg1Ty = CGF.Int32Ty;
828     Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
829     IsVarArg = true;
830   } else {
831     Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
832     Arg1Ty = CGF.Int8PtrTy;
833     if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
834       Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::sponentry));
835     } else
836       Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress),
837                                     llvm::ConstantInt::get(CGF.Int32Ty, 0));
838   }
839 
840   // Mark the call site and declaration with ReturnsTwice.
841   llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
842   llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
843       CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
844       llvm::Attribute::ReturnsTwice);
845   llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
846       llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
847       ReturnsTwiceAttr, /*Local=*/true);
848 
849   llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
850       CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
851   llvm::Value *Args[] = {Buf, Arg1};
852   llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
853   CB->setAttributes(ReturnsTwiceAttr);
854   return RValue::get(CB);
855 }
856 
857 // Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
858 // we handle them here.
859 enum class CodeGenFunction::MSVCIntrin {
860   _BitScanForward,
861   _BitScanReverse,
862   _InterlockedAnd,
863   _InterlockedDecrement,
864   _InterlockedExchange,
865   _InterlockedExchangeAdd,
866   _InterlockedExchangeSub,
867   _InterlockedIncrement,
868   _InterlockedOr,
869   _InterlockedXor,
870   _InterlockedExchangeAdd_acq,
871   _InterlockedExchangeAdd_rel,
872   _InterlockedExchangeAdd_nf,
873   _InterlockedExchange_acq,
874   _InterlockedExchange_rel,
875   _InterlockedExchange_nf,
876   _InterlockedCompareExchange_acq,
877   _InterlockedCompareExchange_rel,
878   _InterlockedCompareExchange_nf,
879   _InterlockedOr_acq,
880   _InterlockedOr_rel,
881   _InterlockedOr_nf,
882   _InterlockedXor_acq,
883   _InterlockedXor_rel,
884   _InterlockedXor_nf,
885   _InterlockedAnd_acq,
886   _InterlockedAnd_rel,
887   _InterlockedAnd_nf,
888   _InterlockedIncrement_acq,
889   _InterlockedIncrement_rel,
890   _InterlockedIncrement_nf,
891   _InterlockedDecrement_acq,
892   _InterlockedDecrement_rel,
893   _InterlockedDecrement_nf,
894   __fastfail,
895 };
896 
897 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
898                                             const CallExpr *E) {
899   switch (BuiltinID) {
900   case MSVCIntrin::_BitScanForward:
901   case MSVCIntrin::_BitScanReverse: {
902     Value *ArgValue = EmitScalarExpr(E->getArg(1));
903 
904     llvm::Type *ArgType = ArgValue->getType();
905     llvm::Type *IndexType =
906       EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
907     llvm::Type *ResultType = ConvertType(E->getType());
908 
909     Value *ArgZero = llvm::Constant::getNullValue(ArgType);
910     Value *ResZero = llvm::Constant::getNullValue(ResultType);
911     Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
912 
913     BasicBlock *Begin = Builder.GetInsertBlock();
914     BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
915     Builder.SetInsertPoint(End);
916     PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
917 
918     Builder.SetInsertPoint(Begin);
919     Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
920     BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
921     Builder.CreateCondBr(IsZero, End, NotZero);
922     Result->addIncoming(ResZero, Begin);
923 
924     Builder.SetInsertPoint(NotZero);
925     Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
926 
927     if (BuiltinID == MSVCIntrin::_BitScanForward) {
928       Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
929       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
930       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
931       Builder.CreateStore(ZeroCount, IndexAddress, false);
932     } else {
933       unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
934       Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
935 
936       Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
937       Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
938       ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
939       Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
940       Builder.CreateStore(Index, IndexAddress, false);
941     }
942     Builder.CreateBr(End);
943     Result->addIncoming(ResOne, NotZero);
944 
945     Builder.SetInsertPoint(End);
946     return Result;
947   }
948   case MSVCIntrin::_InterlockedAnd:
949     return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
950   case MSVCIntrin::_InterlockedExchange:
951     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
952   case MSVCIntrin::_InterlockedExchangeAdd:
953     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
954   case MSVCIntrin::_InterlockedExchangeSub:
955     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
956   case MSVCIntrin::_InterlockedOr:
957     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
958   case MSVCIntrin::_InterlockedXor:
959     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
960   case MSVCIntrin::_InterlockedExchangeAdd_acq:
961     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
962                                  AtomicOrdering::Acquire);
963   case MSVCIntrin::_InterlockedExchangeAdd_rel:
964     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
965                                  AtomicOrdering::Release);
966   case MSVCIntrin::_InterlockedExchangeAdd_nf:
967     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
968                                  AtomicOrdering::Monotonic);
969   case MSVCIntrin::_InterlockedExchange_acq:
970     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
971                                  AtomicOrdering::Acquire);
972   case MSVCIntrin::_InterlockedExchange_rel:
973     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
974                                  AtomicOrdering::Release);
975   case MSVCIntrin::_InterlockedExchange_nf:
976     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
977                                  AtomicOrdering::Monotonic);
978   case MSVCIntrin::_InterlockedCompareExchange_acq:
979     return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
980   case MSVCIntrin::_InterlockedCompareExchange_rel:
981     return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
982   case MSVCIntrin::_InterlockedCompareExchange_nf:
983     return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
984   case MSVCIntrin::_InterlockedOr_acq:
985     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
986                                  AtomicOrdering::Acquire);
987   case MSVCIntrin::_InterlockedOr_rel:
988     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
989                                  AtomicOrdering::Release);
990   case MSVCIntrin::_InterlockedOr_nf:
991     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
992                                  AtomicOrdering::Monotonic);
993   case MSVCIntrin::_InterlockedXor_acq:
994     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
995                                  AtomicOrdering::Acquire);
996   case MSVCIntrin::_InterlockedXor_rel:
997     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
998                                  AtomicOrdering::Release);
999   case MSVCIntrin::_InterlockedXor_nf:
1000     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1001                                  AtomicOrdering::Monotonic);
1002   case MSVCIntrin::_InterlockedAnd_acq:
1003     return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1004                                  AtomicOrdering::Acquire);
1005   case MSVCIntrin::_InterlockedAnd_rel:
1006     return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1007                                  AtomicOrdering::Release);
1008   case MSVCIntrin::_InterlockedAnd_nf:
1009     return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1010                                  AtomicOrdering::Monotonic);
1011   case MSVCIntrin::_InterlockedIncrement_acq:
1012     return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
1013   case MSVCIntrin::_InterlockedIncrement_rel:
1014     return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
1015   case MSVCIntrin::_InterlockedIncrement_nf:
1016     return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
1017   case MSVCIntrin::_InterlockedDecrement_acq:
1018     return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
1019   case MSVCIntrin::_InterlockedDecrement_rel:
1020     return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
1021   case MSVCIntrin::_InterlockedDecrement_nf:
1022     return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
1023 
1024   case MSVCIntrin::_InterlockedDecrement:
1025     return EmitAtomicDecrementValue(*this, E);
1026   case MSVCIntrin::_InterlockedIncrement:
1027     return EmitAtomicIncrementValue(*this, E);
1028 
1029   case MSVCIntrin::__fastfail: {
1030     // Request immediate process termination from the kernel. The instruction
1031     // sequences to do this are documented on MSDN:
1032     // https://msdn.microsoft.com/en-us/library/dn774154.aspx
1033     llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
1034     StringRef Asm, Constraints;
1035     switch (ISA) {
1036     default:
1037       ErrorUnsupported(E, "__fastfail call for this architecture");
1038       break;
1039     case llvm::Triple::x86:
1040     case llvm::Triple::x86_64:
1041       Asm = "int $$0x29";
1042       Constraints = "{cx}";
1043       break;
1044     case llvm::Triple::thumb:
1045       Asm = "udf #251";
1046       Constraints = "{r0}";
1047       break;
1048     case llvm::Triple::aarch64:
1049       Asm = "brk #0xF003";
1050       Constraints = "{w0}";
1051     }
1052     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
1053     llvm::InlineAsm *IA =
1054         llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
1055     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
1056         getLLVMContext(), llvm::AttributeList::FunctionIndex,
1057         llvm::Attribute::NoReturn);
1058     llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
1059     CI->setAttributes(NoReturnAttr);
1060     return CI;
1061   }
1062   }
1063   llvm_unreachable("Incorrect MSVC intrinsic!");
1064 }
1065 
1066 namespace {
1067 // ARC cleanup for __builtin_os_log_format
1068 struct CallObjCArcUse final : EHScopeStack::Cleanup {
1069   CallObjCArcUse(llvm::Value *object) : object(object) {}
1070   llvm::Value *object;
1071 
1072   void Emit(CodeGenFunction &CGF, Flags flags) override {
1073     CGF.EmitARCIntrinsicUse(object);
1074   }
1075 };
1076 }
1077 
1078 Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
1079                                                  BuiltinCheckKind Kind) {
1080   assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
1081           && "Unsupported builtin check kind");
1082 
1083   Value *ArgValue = EmitScalarExpr(E);
1084   if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef())
1085     return ArgValue;
1086 
1087   SanitizerScope SanScope(this);
1088   Value *Cond = Builder.CreateICmpNE(
1089       ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
1090   EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
1091             SanitizerHandler::InvalidBuiltin,
1092             {EmitCheckSourceLocation(E->getExprLoc()),
1093              llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
1094             None);
1095   return ArgValue;
1096 }
1097 
1098 /// Get the argument type for arguments to os_log_helper.
1099 static CanQualType getOSLogArgType(ASTContext &C, int Size) {
1100   QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
1101   return C.getCanonicalType(UnsignedTy);
1102 }
1103 
1104 llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
1105     const analyze_os_log::OSLogBufferLayout &Layout,
1106     CharUnits BufferAlignment) {
1107   ASTContext &Ctx = getContext();
1108 
1109   llvm::SmallString<64> Name;
1110   {
1111     raw_svector_ostream OS(Name);
1112     OS << "__os_log_helper";
1113     OS << "_" << BufferAlignment.getQuantity();
1114     OS << "_" << int(Layout.getSummaryByte());
1115     OS << "_" << int(Layout.getNumArgsByte());
1116     for (const auto &Item : Layout.Items)
1117       OS << "_" << int(Item.getSizeByte()) << "_"
1118          << int(Item.getDescriptorByte());
1119   }
1120 
1121   if (llvm::Function *F = CGM.getModule().getFunction(Name))
1122     return F;
1123 
1124   llvm::SmallVector<QualType, 4> ArgTys;
1125   llvm::SmallVector<ImplicitParamDecl, 4> Params;
1126   Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"),
1127                       Ctx.VoidPtrTy, ImplicitParamDecl::Other);
1128   ArgTys.emplace_back(Ctx.VoidPtrTy);
1129 
1130   for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
1131     char Size = Layout.Items[I].getSizeByte();
1132     if (!Size)
1133       continue;
1134 
1135     QualType ArgTy = getOSLogArgType(Ctx, Size);
1136     Params.emplace_back(
1137         Ctx, nullptr, SourceLocation(),
1138         &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
1139         ImplicitParamDecl::Other);
1140     ArgTys.emplace_back(ArgTy);
1141   }
1142 
1143   FunctionArgList Args;
1144   for (auto &P : Params)
1145     Args.push_back(&P);
1146 
1147   QualType ReturnTy = Ctx.VoidTy;
1148   QualType FuncionTy = Ctx.getFunctionType(ReturnTy, ArgTys, {});
1149 
1150   // The helper function has linkonce_odr linkage to enable the linker to merge
1151   // identical functions. To ensure the merging always happens, 'noinline' is
1152   // attached to the function when compiling with -Oz.
1153   const CGFunctionInfo &FI =
1154       CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args);
1155   llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
1156   llvm::Function *Fn = llvm::Function::Create(
1157       FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
1158   Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
1159   CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn);
1160   CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
1161   Fn->setDoesNotThrow();
1162 
1163   // Attach 'noinline' at -Oz.
1164   if (CGM.getCodeGenOpts().OptimizeSize == 2)
1165     Fn->addFnAttr(llvm::Attribute::NoInline);
1166 
1167   auto NL = ApplyDebugLocation::CreateEmpty(*this);
1168   IdentifierInfo *II = &Ctx.Idents.get(Name);
1169   FunctionDecl *FD = FunctionDecl::Create(
1170       Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
1171       FuncionTy, nullptr, SC_PrivateExtern, false, false);
1172 
1173   StartFunction(FD, ReturnTy, Fn, FI, Args);
1174 
1175   // Create a scope with an artificial location for the body of this function.
1176   auto AL = ApplyDebugLocation::CreateArtificial(*this);
1177 
1178   CharUnits Offset;
1179   Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"),
1180                   BufferAlignment);
1181   Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
1182                       Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
1183   Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
1184                       Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
1185 
1186   unsigned I = 1;
1187   for (const auto &Item : Layout.Items) {
1188     Builder.CreateStore(
1189         Builder.getInt8(Item.getDescriptorByte()),
1190         Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
1191     Builder.CreateStore(
1192         Builder.getInt8(Item.getSizeByte()),
1193         Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
1194 
1195     CharUnits Size = Item.size();
1196     if (!Size.getQuantity())
1197       continue;
1198 
1199     Address Arg = GetAddrOfLocalVar(&Params[I]);
1200     Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
1201     Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(),
1202                                  "argDataCast");
1203     Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
1204     Offset += Size;
1205     ++I;
1206   }
1207 
1208   FinishFunction();
1209 
1210   return Fn;
1211 }
1212 
1213 RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
1214   assert(E.getNumArgs() >= 2 &&
1215          "__builtin_os_log_format takes at least 2 arguments");
1216   ASTContext &Ctx = getContext();
1217   analyze_os_log::OSLogBufferLayout Layout;
1218   analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout);
1219   Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
1220   llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
1221 
1222   // Ignore argument 1, the format string. It is not currently used.
1223   CallArgList Args;
1224   Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
1225 
1226   for (const auto &Item : Layout.Items) {
1227     int Size = Item.getSizeByte();
1228     if (!Size)
1229       continue;
1230 
1231     llvm::Value *ArgVal;
1232 
1233     if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
1234       uint64_t Val = 0;
1235       for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
1236         Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
1237       ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
1238     } else if (const Expr *TheExpr = Item.getExpr()) {
1239       ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
1240 
1241       // Check if this is a retainable type.
1242       if (TheExpr->getType()->isObjCRetainableType()) {
1243         assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
1244                "Only scalar can be a ObjC retainable type");
1245         // Check if the object is constant, if not, save it in
1246         // RetainableOperands.
1247         if (!isa<Constant>(ArgVal))
1248           RetainableOperands.push_back(ArgVal);
1249       }
1250     } else {
1251       ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
1252     }
1253 
1254     unsigned ArgValSize =
1255         CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
1256     llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
1257                                                      ArgValSize);
1258     ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
1259     CanQualType ArgTy = getOSLogArgType(Ctx, Size);
1260     // If ArgVal has type x86_fp80, zero-extend ArgVal.
1261     ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
1262     Args.add(RValue::get(ArgVal), ArgTy);
1263   }
1264 
1265   const CGFunctionInfo &FI =
1266       CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
1267   llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
1268       Layout, BufAddr.getAlignment());
1269   EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
1270 
1271   // Push a clang.arc.use cleanup for each object in RetainableOperands. The
1272   // cleanup will cause the use to appear after the final log call, keeping
1273   // the object valid while it’s held in the log buffer.  Note that if there’s
1274   // a release cleanup on the object, it will already be active; since
1275   // cleanups are emitted in reverse order, the use will occur before the
1276   // object is released.
1277   if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
1278       CGM.getCodeGenOpts().OptimizationLevel != 0)
1279     for (llvm::Value *Object : RetainableOperands)
1280       pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object);
1281 
1282   return RValue::get(BufAddr.getPointer());
1283 }
1284 
1285 /// Determine if a binop is a checked mixed-sign multiply we can specialize.
1286 static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
1287                                        WidthAndSignedness Op1Info,
1288                                        WidthAndSignedness Op2Info,
1289                                        WidthAndSignedness ResultInfo) {
1290   return BuiltinID == Builtin::BI__builtin_mul_overflow &&
1291          std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
1292          Op1Info.Signed != Op2Info.Signed;
1293 }
1294 
1295 /// Emit a checked mixed-sign multiply. This is a cheaper specialization of
1296 /// the generic checked-binop irgen.
1297 static RValue
1298 EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
1299                              WidthAndSignedness Op1Info, const clang::Expr *Op2,
1300                              WidthAndSignedness Op2Info,
1301                              const clang::Expr *ResultArg, QualType ResultQTy,
1302                              WidthAndSignedness ResultInfo) {
1303   assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
1304                                     Op2Info, ResultInfo) &&
1305          "Not a mixed-sign multipliction we can specialize");
1306 
1307   // Emit the signed and unsigned operands.
1308   const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
1309   const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
1310   llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
1311   llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
1312   unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
1313   unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
1314 
1315   // One of the operands may be smaller than the other. If so, [s|z]ext it.
1316   if (SignedOpWidth < UnsignedOpWidth)
1317     Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
1318   if (UnsignedOpWidth < SignedOpWidth)
1319     Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
1320 
1321   llvm::Type *OpTy = Signed->getType();
1322   llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
1323   Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
1324   llvm::Type *ResTy = ResultPtr.getElementType();
1325   unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
1326 
1327   // Take the absolute value of the signed operand.
1328   llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
1329   llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
1330   llvm::Value *AbsSigned =
1331       CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
1332 
1333   // Perform a checked unsigned multiplication.
1334   llvm::Value *UnsignedOverflow;
1335   llvm::Value *UnsignedResult =
1336       EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
1337                             Unsigned, UnsignedOverflow);
1338 
1339   llvm::Value *Overflow, *Result;
1340   if (ResultInfo.Signed) {
1341     // Signed overflow occurs if the result is greater than INT_MAX or lesser
1342     // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
1343     auto IntMax =
1344         llvm::APInt::getSignedMaxValue(ResultInfo.Width).zextOrSelf(OpWidth);
1345     llvm::Value *MaxResult =
1346         CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
1347                               CGF.Builder.CreateZExt(IsNegative, OpTy));
1348     llvm::Value *SignedOverflow =
1349         CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
1350     Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
1351 
1352     // Prepare the signed result (possibly by negating it).
1353     llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
1354     llvm::Value *SignedResult =
1355         CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
1356     Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
1357   } else {
1358     // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
1359     llvm::Value *Underflow = CGF.Builder.CreateAnd(
1360         IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
1361     Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
1362     if (ResultInfo.Width < OpWidth) {
1363       auto IntMax =
1364           llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
1365       llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
1366           UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
1367       Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
1368     }
1369 
1370     // Negate the product if it would be negative in infinite precision.
1371     Result = CGF.Builder.CreateSelect(
1372         IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
1373 
1374     Result = CGF.Builder.CreateTrunc(Result, ResTy);
1375   }
1376   assert(Overflow && Result && "Missing overflow or result");
1377 
1378   bool isVolatile =
1379       ResultArg->getType()->getPointeeType().isVolatileQualified();
1380   CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
1381                           isVolatile);
1382   return RValue::get(Overflow);
1383 }
1384 
1385 static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType,
1386                                Value *&RecordPtr, CharUnits Align,
1387                                llvm::FunctionCallee Func, int Lvl) {
1388   const auto *RT = RType->getAs<RecordType>();
1389   ASTContext &Context = CGF.getContext();
1390   RecordDecl *RD = RT->getDecl()->getDefinition();
1391   ASTContext &Ctx = RD->getASTContext();
1392   const ASTRecordLayout &RL = Ctx.getASTRecordLayout(RD);
1393   std::string Pad = std::string(Lvl * 4, ' ');
1394 
1395   Value *GString =
1396       CGF.Builder.CreateGlobalStringPtr(RType.getAsString() + " {\n");
1397   Value *Res = CGF.Builder.CreateCall(Func, {GString});
1398 
1399   static llvm::DenseMap<QualType, const char *> Types;
1400   if (Types.empty()) {
1401     Types[Context.CharTy] = "%c";
1402     Types[Context.BoolTy] = "%d";
1403     Types[Context.SignedCharTy] = "%hhd";
1404     Types[Context.UnsignedCharTy] = "%hhu";
1405     Types[Context.IntTy] = "%d";
1406     Types[Context.UnsignedIntTy] = "%u";
1407     Types[Context.LongTy] = "%ld";
1408     Types[Context.UnsignedLongTy] = "%lu";
1409     Types[Context.LongLongTy] = "%lld";
1410     Types[Context.UnsignedLongLongTy] = "%llu";
1411     Types[Context.ShortTy] = "%hd";
1412     Types[Context.UnsignedShortTy] = "%hu";
1413     Types[Context.VoidPtrTy] = "%p";
1414     Types[Context.FloatTy] = "%f";
1415     Types[Context.DoubleTy] = "%f";
1416     Types[Context.LongDoubleTy] = "%Lf";
1417     Types[Context.getPointerType(Context.CharTy)] = "%s";
1418     Types[Context.getPointerType(Context.getConstType(Context.CharTy))] = "%s";
1419   }
1420 
1421   for (const auto *FD : RD->fields()) {
1422     uint64_t Off = RL.getFieldOffset(FD->getFieldIndex());
1423     Off = Ctx.toCharUnitsFromBits(Off).getQuantity();
1424 
1425     Value *FieldPtr = RecordPtr;
1426     if (RD->isUnion())
1427       FieldPtr = CGF.Builder.CreatePointerCast(
1428           FieldPtr, CGF.ConvertType(Context.getPointerType(FD->getType())));
1429     else
1430       FieldPtr = CGF.Builder.CreateStructGEP(CGF.ConvertType(RType), FieldPtr,
1431                                              FD->getFieldIndex());
1432 
1433     GString = CGF.Builder.CreateGlobalStringPtr(
1434         llvm::Twine(Pad)
1435             .concat(FD->getType().getAsString())
1436             .concat(llvm::Twine(' '))
1437             .concat(FD->getNameAsString())
1438             .concat(" : ")
1439             .str());
1440     Value *TmpRes = CGF.Builder.CreateCall(Func, {GString});
1441     Res = CGF.Builder.CreateAdd(Res, TmpRes);
1442 
1443     QualType CanonicalType =
1444         FD->getType().getUnqualifiedType().getCanonicalType();
1445 
1446     // We check whether we are in a recursive type
1447     if (CanonicalType->isRecordType()) {
1448       Value *TmpRes =
1449           dumpRecord(CGF, CanonicalType, FieldPtr, Align, Func, Lvl + 1);
1450       Res = CGF.Builder.CreateAdd(TmpRes, Res);
1451       continue;
1452     }
1453 
1454     // We try to determine the best format to print the current field
1455     llvm::Twine Format = Types.find(CanonicalType) == Types.end()
1456                              ? Types[Context.VoidPtrTy]
1457                              : Types[CanonicalType];
1458 
1459     Address FieldAddress = Address(FieldPtr, Align);
1460     FieldPtr = CGF.Builder.CreateLoad(FieldAddress);
1461 
1462     // FIXME Need to handle bitfield here
1463     GString = CGF.Builder.CreateGlobalStringPtr(
1464         Format.concat(llvm::Twine('\n')).str());
1465     TmpRes = CGF.Builder.CreateCall(Func, {GString, FieldPtr});
1466     Res = CGF.Builder.CreateAdd(Res, TmpRes);
1467   }
1468 
1469   GString = CGF.Builder.CreateGlobalStringPtr(Pad + "}\n");
1470   Value *TmpRes = CGF.Builder.CreateCall(Func, {GString});
1471   Res = CGF.Builder.CreateAdd(Res, TmpRes);
1472   return Res;
1473 }
1474 
1475 static bool
1476 TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty,
1477                               llvm::SmallPtrSetImpl<const Decl *> &Seen) {
1478   if (const auto *Arr = Ctx.getAsArrayType(Ty))
1479     Ty = Ctx.getBaseElementType(Arr);
1480 
1481   const auto *Record = Ty->getAsCXXRecordDecl();
1482   if (!Record)
1483     return false;
1484 
1485   // We've already checked this type, or are in the process of checking it.
1486   if (!Seen.insert(Record).second)
1487     return false;
1488 
1489   assert(Record->hasDefinition() &&
1490          "Incomplete types should already be diagnosed");
1491 
1492   if (Record->isDynamicClass())
1493     return true;
1494 
1495   for (FieldDecl *F : Record->fields()) {
1496     if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
1497       return true;
1498   }
1499   return false;
1500 }
1501 
1502 /// Determine if the specified type requires laundering by checking if it is a
1503 /// dynamic class type or contains a subobject which is a dynamic class type.
1504 static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) {
1505   if (!CGM.getCodeGenOpts().StrictVTablePointers)
1506     return false;
1507   llvm::SmallPtrSet<const Decl *, 16> Seen;
1508   return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
1509 }
1510 
1511 RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
1512   llvm::Value *Src = EmitScalarExpr(E->getArg(0));
1513   llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
1514 
1515   // The builtin's shift arg may have a different type than the source arg and
1516   // result, but the LLVM intrinsic uses the same type for all values.
1517   llvm::Type *Ty = Src->getType();
1518   ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
1519 
1520   // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
1521   unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1522   Function *F = CGM.getIntrinsic(IID, Ty);
1523   return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
1524 }
1525 
1526 RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
1527                                         const CallExpr *E,
1528                                         ReturnValueSlot ReturnValue) {
1529   const FunctionDecl *FD = GD.getDecl()->getAsFunction();
1530   // See if we can constant fold this builtin.  If so, don't emit it at all.
1531   Expr::EvalResult Result;
1532   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
1533       !Result.hasSideEffects()) {
1534     if (Result.Val.isInt())
1535       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
1536                                                 Result.Val.getInt()));
1537     if (Result.Val.isFloat())
1538       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
1539                                                Result.Val.getFloat()));
1540   }
1541 
1542   // There are LLVM math intrinsics/instructions corresponding to math library
1543   // functions except the LLVM op will never set errno while the math library
1544   // might. Also, math builtins have the same semantics as their math library
1545   // twins. Thus, we can transform math library and builtin calls to their
1546   // LLVM counterparts if the call is marked 'const' (known to never set errno).
1547   if (FD->hasAttr<ConstAttr>()) {
1548     switch (BuiltinID) {
1549     case Builtin::BIceil:
1550     case Builtin::BIceilf:
1551     case Builtin::BIceill:
1552     case Builtin::BI__builtin_ceil:
1553     case Builtin::BI__builtin_ceilf:
1554     case Builtin::BI__builtin_ceill:
1555       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
1556 
1557     case Builtin::BIcopysign:
1558     case Builtin::BIcopysignf:
1559     case Builtin::BIcopysignl:
1560     case Builtin::BI__builtin_copysign:
1561     case Builtin::BI__builtin_copysignf:
1562     case Builtin::BI__builtin_copysignl:
1563     case Builtin::BI__builtin_copysignf128:
1564       return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
1565 
1566     case Builtin::BIcos:
1567     case Builtin::BIcosf:
1568     case Builtin::BIcosl:
1569     case Builtin::BI__builtin_cos:
1570     case Builtin::BI__builtin_cosf:
1571     case Builtin::BI__builtin_cosl:
1572       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::cos));
1573 
1574     case Builtin::BIexp:
1575     case Builtin::BIexpf:
1576     case Builtin::BIexpl:
1577     case Builtin::BI__builtin_exp:
1578     case Builtin::BI__builtin_expf:
1579     case Builtin::BI__builtin_expl:
1580       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp));
1581 
1582     case Builtin::BIexp2:
1583     case Builtin::BIexp2f:
1584     case Builtin::BIexp2l:
1585     case Builtin::BI__builtin_exp2:
1586     case Builtin::BI__builtin_exp2f:
1587     case Builtin::BI__builtin_exp2l:
1588       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp2));
1589 
1590     case Builtin::BIfabs:
1591     case Builtin::BIfabsf:
1592     case Builtin::BIfabsl:
1593     case Builtin::BI__builtin_fabs:
1594     case Builtin::BI__builtin_fabsf:
1595     case Builtin::BI__builtin_fabsl:
1596     case Builtin::BI__builtin_fabsf128:
1597       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
1598 
1599     case Builtin::BIfloor:
1600     case Builtin::BIfloorf:
1601     case Builtin::BIfloorl:
1602     case Builtin::BI__builtin_floor:
1603     case Builtin::BI__builtin_floorf:
1604     case Builtin::BI__builtin_floorl:
1605       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
1606 
1607     case Builtin::BIfma:
1608     case Builtin::BIfmaf:
1609     case Builtin::BIfmal:
1610     case Builtin::BI__builtin_fma:
1611     case Builtin::BI__builtin_fmaf:
1612     case Builtin::BI__builtin_fmal:
1613       return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma));
1614 
1615     case Builtin::BIfmax:
1616     case Builtin::BIfmaxf:
1617     case Builtin::BIfmaxl:
1618     case Builtin::BI__builtin_fmax:
1619     case Builtin::BI__builtin_fmaxf:
1620     case Builtin::BI__builtin_fmaxl:
1621       return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
1622 
1623     case Builtin::BIfmin:
1624     case Builtin::BIfminf:
1625     case Builtin::BIfminl:
1626     case Builtin::BI__builtin_fmin:
1627     case Builtin::BI__builtin_fminf:
1628     case Builtin::BI__builtin_fminl:
1629       return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
1630 
1631     // fmod() is a special-case. It maps to the frem instruction rather than an
1632     // LLVM intrinsic.
1633     case Builtin::BIfmod:
1634     case Builtin::BIfmodf:
1635     case Builtin::BIfmodl:
1636     case Builtin::BI__builtin_fmod:
1637     case Builtin::BI__builtin_fmodf:
1638     case Builtin::BI__builtin_fmodl: {
1639       Value *Arg1 = EmitScalarExpr(E->getArg(0));
1640       Value *Arg2 = EmitScalarExpr(E->getArg(1));
1641       return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
1642     }
1643 
1644     case Builtin::BIlog:
1645     case Builtin::BIlogf:
1646     case Builtin::BIlogl:
1647     case Builtin::BI__builtin_log:
1648     case Builtin::BI__builtin_logf:
1649     case Builtin::BI__builtin_logl:
1650       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log));
1651 
1652     case Builtin::BIlog10:
1653     case Builtin::BIlog10f:
1654     case Builtin::BIlog10l:
1655     case Builtin::BI__builtin_log10:
1656     case Builtin::BI__builtin_log10f:
1657     case Builtin::BI__builtin_log10l:
1658       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log10));
1659 
1660     case Builtin::BIlog2:
1661     case Builtin::BIlog2f:
1662     case Builtin::BIlog2l:
1663     case Builtin::BI__builtin_log2:
1664     case Builtin::BI__builtin_log2f:
1665     case Builtin::BI__builtin_log2l:
1666       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log2));
1667 
1668     case Builtin::BInearbyint:
1669     case Builtin::BInearbyintf:
1670     case Builtin::BInearbyintl:
1671     case Builtin::BI__builtin_nearbyint:
1672     case Builtin::BI__builtin_nearbyintf:
1673     case Builtin::BI__builtin_nearbyintl:
1674       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
1675 
1676     case Builtin::BIpow:
1677     case Builtin::BIpowf:
1678     case Builtin::BIpowl:
1679     case Builtin::BI__builtin_pow:
1680     case Builtin::BI__builtin_powf:
1681     case Builtin::BI__builtin_powl:
1682       return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::pow));
1683 
1684     case Builtin::BIrint:
1685     case Builtin::BIrintf:
1686     case Builtin::BIrintl:
1687     case Builtin::BI__builtin_rint:
1688     case Builtin::BI__builtin_rintf:
1689     case Builtin::BI__builtin_rintl:
1690       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
1691 
1692     case Builtin::BIround:
1693     case Builtin::BIroundf:
1694     case Builtin::BIroundl:
1695     case Builtin::BI__builtin_round:
1696     case Builtin::BI__builtin_roundf:
1697     case Builtin::BI__builtin_roundl:
1698       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
1699 
1700     case Builtin::BIsin:
1701     case Builtin::BIsinf:
1702     case Builtin::BIsinl:
1703     case Builtin::BI__builtin_sin:
1704     case Builtin::BI__builtin_sinf:
1705     case Builtin::BI__builtin_sinl:
1706       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sin));
1707 
1708     case Builtin::BIsqrt:
1709     case Builtin::BIsqrtf:
1710     case Builtin::BIsqrtl:
1711     case Builtin::BI__builtin_sqrt:
1712     case Builtin::BI__builtin_sqrtf:
1713     case Builtin::BI__builtin_sqrtl:
1714       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt));
1715 
1716     case Builtin::BItrunc:
1717     case Builtin::BItruncf:
1718     case Builtin::BItruncl:
1719     case Builtin::BI__builtin_trunc:
1720     case Builtin::BI__builtin_truncf:
1721     case Builtin::BI__builtin_truncl:
1722       return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
1723 
1724     default:
1725       break;
1726     }
1727   }
1728 
1729   switch (BuiltinID) {
1730   default: break;
1731   case Builtin::BI__builtin___CFStringMakeConstantString:
1732   case Builtin::BI__builtin___NSStringMakeConstantString:
1733     return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
1734   case Builtin::BI__builtin_stdarg_start:
1735   case Builtin::BI__builtin_va_start:
1736   case Builtin::BI__va_start:
1737   case Builtin::BI__builtin_va_end:
1738     return RValue::get(
1739         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
1740                            ? EmitScalarExpr(E->getArg(0))
1741                            : EmitVAListRef(E->getArg(0)).getPointer(),
1742                        BuiltinID != Builtin::BI__builtin_va_end));
1743   case Builtin::BI__builtin_va_copy: {
1744     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
1745     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
1746 
1747     llvm::Type *Type = Int8PtrTy;
1748 
1749     DstPtr = Builder.CreateBitCast(DstPtr, Type);
1750     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
1751     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
1752                                           {DstPtr, SrcPtr}));
1753   }
1754   case Builtin::BI__builtin_abs:
1755   case Builtin::BI__builtin_labs:
1756   case Builtin::BI__builtin_llabs: {
1757     // X < 0 ? -X : X
1758     // The negation has 'nsw' because abs of INT_MIN is undefined.
1759     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1760     Value *NegOp = Builder.CreateNSWNeg(ArgValue, "neg");
1761     Constant *Zero = llvm::Constant::getNullValue(ArgValue->getType());
1762     Value *CmpResult = Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
1763     Value *Result = Builder.CreateSelect(CmpResult, NegOp, ArgValue, "abs");
1764     return RValue::get(Result);
1765   }
1766   case Builtin::BI__builtin_conj:
1767   case Builtin::BI__builtin_conjf:
1768   case Builtin::BI__builtin_conjl: {
1769     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1770     Value *Real = ComplexVal.first;
1771     Value *Imag = ComplexVal.second;
1772     Value *Zero =
1773       Imag->getType()->isFPOrFPVectorTy()
1774         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
1775         : llvm::Constant::getNullValue(Imag->getType());
1776 
1777     Imag = Builder.CreateFSub(Zero, Imag, "sub");
1778     return RValue::getComplex(std::make_pair(Real, Imag));
1779   }
1780   case Builtin::BI__builtin_creal:
1781   case Builtin::BI__builtin_crealf:
1782   case Builtin::BI__builtin_creall:
1783   case Builtin::BIcreal:
1784   case Builtin::BIcrealf:
1785   case Builtin::BIcreall: {
1786     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1787     return RValue::get(ComplexVal.first);
1788   }
1789 
1790   case Builtin::BI__builtin_dump_struct: {
1791     llvm::Type *LLVMIntTy = getTypes().ConvertType(getContext().IntTy);
1792     llvm::FunctionType *LLVMFuncType = llvm::FunctionType::get(
1793         LLVMIntTy, {llvm::Type::getInt8PtrTy(getLLVMContext())}, true);
1794 
1795     Value *Func = EmitScalarExpr(E->getArg(1)->IgnoreImpCasts());
1796     CharUnits Arg0Align = EmitPointerWithAlignment(E->getArg(0)).getAlignment();
1797 
1798     const Expr *Arg0 = E->getArg(0)->IgnoreImpCasts();
1799     QualType Arg0Type = Arg0->getType()->getPointeeType();
1800 
1801     Value *RecordPtr = EmitScalarExpr(Arg0);
1802     Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align,
1803                             {LLVMFuncType, Func}, 0);
1804     return RValue::get(Res);
1805   }
1806 
1807   case Builtin::BI__builtin_cimag:
1808   case Builtin::BI__builtin_cimagf:
1809   case Builtin::BI__builtin_cimagl:
1810   case Builtin::BIcimag:
1811   case Builtin::BIcimagf:
1812   case Builtin::BIcimagl: {
1813     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1814     return RValue::get(ComplexVal.second);
1815   }
1816 
1817   case Builtin::BI__builtin_clrsb:
1818   case Builtin::BI__builtin_clrsbl:
1819   case Builtin::BI__builtin_clrsbll: {
1820     // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
1821     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1822 
1823     llvm::Type *ArgType = ArgValue->getType();
1824     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1825 
1826     llvm::Type *ResultType = ConvertType(E->getType());
1827     Value *Zero = llvm::Constant::getNullValue(ArgType);
1828     Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
1829     Value *Inverse = Builder.CreateNot(ArgValue, "not");
1830     Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
1831     Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
1832     Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
1833     Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1834                                    "cast");
1835     return RValue::get(Result);
1836   }
1837   case Builtin::BI__builtin_ctzs:
1838   case Builtin::BI__builtin_ctz:
1839   case Builtin::BI__builtin_ctzl:
1840   case Builtin::BI__builtin_ctzll: {
1841     Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
1842 
1843     llvm::Type *ArgType = ArgValue->getType();
1844     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1845 
1846     llvm::Type *ResultType = ConvertType(E->getType());
1847     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
1848     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1849     if (Result->getType() != ResultType)
1850       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1851                                      "cast");
1852     return RValue::get(Result);
1853   }
1854   case Builtin::BI__builtin_clzs:
1855   case Builtin::BI__builtin_clz:
1856   case Builtin::BI__builtin_clzl:
1857   case Builtin::BI__builtin_clzll: {
1858     Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
1859 
1860     llvm::Type *ArgType = ArgValue->getType();
1861     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1862 
1863     llvm::Type *ResultType = ConvertType(E->getType());
1864     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
1865     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1866     if (Result->getType() != ResultType)
1867       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1868                                      "cast");
1869     return RValue::get(Result);
1870   }
1871   case Builtin::BI__builtin_ffs:
1872   case Builtin::BI__builtin_ffsl:
1873   case Builtin::BI__builtin_ffsll: {
1874     // ffs(x) -> x ? cttz(x) + 1 : 0
1875     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1876 
1877     llvm::Type *ArgType = ArgValue->getType();
1878     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1879 
1880     llvm::Type *ResultType = ConvertType(E->getType());
1881     Value *Tmp =
1882         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
1883                           llvm::ConstantInt::get(ArgType, 1));
1884     Value *Zero = llvm::Constant::getNullValue(ArgType);
1885     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
1886     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
1887     if (Result->getType() != ResultType)
1888       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1889                                      "cast");
1890     return RValue::get(Result);
1891   }
1892   case Builtin::BI__builtin_parity:
1893   case Builtin::BI__builtin_parityl:
1894   case Builtin::BI__builtin_parityll: {
1895     // parity(x) -> ctpop(x) & 1
1896     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1897 
1898     llvm::Type *ArgType = ArgValue->getType();
1899     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1900 
1901     llvm::Type *ResultType = ConvertType(E->getType());
1902     Value *Tmp = Builder.CreateCall(F, ArgValue);
1903     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
1904     if (Result->getType() != ResultType)
1905       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1906                                      "cast");
1907     return RValue::get(Result);
1908   }
1909   case Builtin::BI__lzcnt16:
1910   case Builtin::BI__lzcnt:
1911   case Builtin::BI__lzcnt64: {
1912     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1913 
1914     llvm::Type *ArgType = ArgValue->getType();
1915     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1916 
1917     llvm::Type *ResultType = ConvertType(E->getType());
1918     Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
1919     if (Result->getType() != ResultType)
1920       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1921                                      "cast");
1922     return RValue::get(Result);
1923   }
1924   case Builtin::BI__popcnt16:
1925   case Builtin::BI__popcnt:
1926   case Builtin::BI__popcnt64:
1927   case Builtin::BI__builtin_popcount:
1928   case Builtin::BI__builtin_popcountl:
1929   case Builtin::BI__builtin_popcountll: {
1930     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1931 
1932     llvm::Type *ArgType = ArgValue->getType();
1933     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1934 
1935     llvm::Type *ResultType = ConvertType(E->getType());
1936     Value *Result = Builder.CreateCall(F, ArgValue);
1937     if (Result->getType() != ResultType)
1938       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1939                                      "cast");
1940     return RValue::get(Result);
1941   }
1942   case Builtin::BI__builtin_unpredictable: {
1943     // Always return the argument of __builtin_unpredictable. LLVM does not
1944     // handle this builtin. Metadata for this builtin should be added directly
1945     // to instructions such as branches or switches that use it.
1946     return RValue::get(EmitScalarExpr(E->getArg(0)));
1947   }
1948   case Builtin::BI__builtin_expect: {
1949     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1950     llvm::Type *ArgType = ArgValue->getType();
1951 
1952     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
1953     // Don't generate llvm.expect on -O0 as the backend won't use it for
1954     // anything.
1955     // Note, we still IRGen ExpectedValue because it could have side-effects.
1956     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
1957       return RValue::get(ArgValue);
1958 
1959     Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
1960     Value *Result =
1961         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
1962     return RValue::get(Result);
1963   }
1964   case Builtin::BI__builtin_assume_aligned: {
1965     const Expr *Ptr = E->getArg(0);
1966     Value *PtrValue = EmitScalarExpr(Ptr);
1967     Value *OffsetValue =
1968       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
1969 
1970     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
1971     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
1972     unsigned Alignment = (unsigned)AlignmentCI->getZExtValue();
1973 
1974     EmitAlignmentAssumption(PtrValue, Ptr,
1975                             /*The expr loc is sufficient.*/ SourceLocation(),
1976                             Alignment, OffsetValue);
1977     return RValue::get(PtrValue);
1978   }
1979   case Builtin::BI__assume:
1980   case Builtin::BI__builtin_assume: {
1981     if (E->getArg(0)->HasSideEffects(getContext()))
1982       return RValue::get(nullptr);
1983 
1984     Value *ArgValue = EmitScalarExpr(E->getArg(0));
1985     Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
1986     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
1987   }
1988   case Builtin::BI__builtin_bswap16:
1989   case Builtin::BI__builtin_bswap32:
1990   case Builtin::BI__builtin_bswap64: {
1991     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
1992   }
1993   case Builtin::BI__builtin_bitreverse8:
1994   case Builtin::BI__builtin_bitreverse16:
1995   case Builtin::BI__builtin_bitreverse32:
1996   case Builtin::BI__builtin_bitreverse64: {
1997     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
1998   }
1999   case Builtin::BI__builtin_rotateleft8:
2000   case Builtin::BI__builtin_rotateleft16:
2001   case Builtin::BI__builtin_rotateleft32:
2002   case Builtin::BI__builtin_rotateleft64:
2003   case Builtin::BI_rotl8: // Microsoft variants of rotate left
2004   case Builtin::BI_rotl16:
2005   case Builtin::BI_rotl:
2006   case Builtin::BI_lrotl:
2007   case Builtin::BI_rotl64:
2008     return emitRotate(E, false);
2009 
2010   case Builtin::BI__builtin_rotateright8:
2011   case Builtin::BI__builtin_rotateright16:
2012   case Builtin::BI__builtin_rotateright32:
2013   case Builtin::BI__builtin_rotateright64:
2014   case Builtin::BI_rotr8: // Microsoft variants of rotate right
2015   case Builtin::BI_rotr16:
2016   case Builtin::BI_rotr:
2017   case Builtin::BI_lrotr:
2018   case Builtin::BI_rotr64:
2019     return emitRotate(E, true);
2020 
2021   case Builtin::BI__builtin_constant_p: {
2022     llvm::Type *ResultType = ConvertType(E->getType());
2023     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
2024       // At -O0, we don't perform inlining, so we don't need to delay the
2025       // processing.
2026       return RValue::get(ConstantInt::get(ResultType, 0));
2027 
2028     const Expr *Arg = E->getArg(0);
2029     QualType ArgType = Arg->getType();
2030     if (!hasScalarEvaluationKind(ArgType) || ArgType->isFunctionType())
2031       // We can only reason about scalar types.
2032       return RValue::get(ConstantInt::get(ResultType, 0));
2033 
2034     Value *ArgValue = EmitScalarExpr(Arg);
2035     if (ArgType->isObjCObjectPointerType()) {
2036       // Convert Objective-C objects to id because we cannot distinguish between
2037       // LLVM types for Obj-C classes as they are opaque.
2038       ArgType = CGM.getContext().getObjCIdType();
2039       ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
2040     }
2041     Function *F =
2042         CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
2043     Value *Result = Builder.CreateCall(F, ArgValue);
2044     if (Result->getType() != ResultType)
2045       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
2046     return RValue::get(Result);
2047   }
2048   case Builtin::BI__builtin_dynamic_object_size:
2049   case Builtin::BI__builtin_object_size: {
2050     unsigned Type =
2051         E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
2052     auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
2053 
2054     // We pass this builtin onto the optimizer so that it can figure out the
2055     // object size in more complex cases.
2056     bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
2057     return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
2058                                              /*EmittedE=*/nullptr, IsDynamic));
2059   }
2060   case Builtin::BI__builtin_prefetch: {
2061     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
2062     // FIXME: Technically these constants should of type 'int', yes?
2063     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
2064       llvm::ConstantInt::get(Int32Ty, 0);
2065     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
2066       llvm::ConstantInt::get(Int32Ty, 3);
2067     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
2068     Function *F = CGM.getIntrinsic(Intrinsic::prefetch);
2069     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
2070   }
2071   case Builtin::BI__builtin_readcyclecounter: {
2072     Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
2073     return RValue::get(Builder.CreateCall(F));
2074   }
2075   case Builtin::BI__builtin___clear_cache: {
2076     Value *Begin = EmitScalarExpr(E->getArg(0));
2077     Value *End = EmitScalarExpr(E->getArg(1));
2078     Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
2079     return RValue::get(Builder.CreateCall(F, {Begin, End}));
2080   }
2081   case Builtin::BI__builtin_trap:
2082     return RValue::get(EmitTrapCall(Intrinsic::trap));
2083   case Builtin::BI__debugbreak:
2084     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
2085   case Builtin::BI__builtin_unreachable: {
2086     EmitUnreachable(E->getExprLoc());
2087 
2088     // We do need to preserve an insertion point.
2089     EmitBlock(createBasicBlock("unreachable.cont"));
2090 
2091     return RValue::get(nullptr);
2092   }
2093 
2094   case Builtin::BI__builtin_powi:
2095   case Builtin::BI__builtin_powif:
2096   case Builtin::BI__builtin_powil: {
2097     Value *Base = EmitScalarExpr(E->getArg(0));
2098     Value *Exponent = EmitScalarExpr(E->getArg(1));
2099     llvm::Type *ArgType = Base->getType();
2100     Function *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
2101     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
2102   }
2103 
2104   case Builtin::BI__builtin_isgreater:
2105   case Builtin::BI__builtin_isgreaterequal:
2106   case Builtin::BI__builtin_isless:
2107   case Builtin::BI__builtin_islessequal:
2108   case Builtin::BI__builtin_islessgreater:
2109   case Builtin::BI__builtin_isunordered: {
2110     // Ordered comparisons: we know the arguments to these are matching scalar
2111     // floating point values.
2112     Value *LHS = EmitScalarExpr(E->getArg(0));
2113     Value *RHS = EmitScalarExpr(E->getArg(1));
2114 
2115     switch (BuiltinID) {
2116     default: llvm_unreachable("Unknown ordered comparison");
2117     case Builtin::BI__builtin_isgreater:
2118       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
2119       break;
2120     case Builtin::BI__builtin_isgreaterequal:
2121       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
2122       break;
2123     case Builtin::BI__builtin_isless:
2124       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
2125       break;
2126     case Builtin::BI__builtin_islessequal:
2127       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
2128       break;
2129     case Builtin::BI__builtin_islessgreater:
2130       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
2131       break;
2132     case Builtin::BI__builtin_isunordered:
2133       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
2134       break;
2135     }
2136     // ZExt bool to int type.
2137     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
2138   }
2139   case Builtin::BI__builtin_isnan: {
2140     Value *V = EmitScalarExpr(E->getArg(0));
2141     V = Builder.CreateFCmpUNO(V, V, "cmp");
2142     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
2143   }
2144 
2145   case Builtin::BIfinite:
2146   case Builtin::BI__finite:
2147   case Builtin::BIfinitef:
2148   case Builtin::BI__finitef:
2149   case Builtin::BIfinitel:
2150   case Builtin::BI__finitel:
2151   case Builtin::BI__builtin_isinf:
2152   case Builtin::BI__builtin_isfinite: {
2153     // isinf(x)    --> fabs(x) == infinity
2154     // isfinite(x) --> fabs(x) != infinity
2155     // x != NaN via the ordered compare in either case.
2156     Value *V = EmitScalarExpr(E->getArg(0));
2157     Value *Fabs = EmitFAbs(*this, V);
2158     Constant *Infinity = ConstantFP::getInfinity(V->getType());
2159     CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
2160                                   ? CmpInst::FCMP_OEQ
2161                                   : CmpInst::FCMP_ONE;
2162     Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
2163     return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
2164   }
2165 
2166   case Builtin::BI__builtin_isinf_sign: {
2167     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
2168     Value *Arg = EmitScalarExpr(E->getArg(0));
2169     Value *AbsArg = EmitFAbs(*this, Arg);
2170     Value *IsInf = Builder.CreateFCmpOEQ(
2171         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
2172     Value *IsNeg = EmitSignBit(*this, Arg);
2173 
2174     llvm::Type *IntTy = ConvertType(E->getType());
2175     Value *Zero = Constant::getNullValue(IntTy);
2176     Value *One = ConstantInt::get(IntTy, 1);
2177     Value *NegativeOne = ConstantInt::get(IntTy, -1);
2178     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
2179     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
2180     return RValue::get(Result);
2181   }
2182 
2183   case Builtin::BI__builtin_isnormal: {
2184     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
2185     Value *V = EmitScalarExpr(E->getArg(0));
2186     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
2187 
2188     Value *Abs = EmitFAbs(*this, V);
2189     Value *IsLessThanInf =
2190       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
2191     APFloat Smallest = APFloat::getSmallestNormalized(
2192                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
2193     Value *IsNormal =
2194       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
2195                             "isnormal");
2196     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
2197     V = Builder.CreateAnd(V, IsNormal, "and");
2198     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
2199   }
2200 
2201   case Builtin::BI__builtin_flt_rounds: {
2202     Function *F = CGM.getIntrinsic(Intrinsic::flt_rounds);
2203 
2204     llvm::Type *ResultType = ConvertType(E->getType());
2205     Value *Result = Builder.CreateCall(F);
2206     if (Result->getType() != ResultType)
2207       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
2208                                      "cast");
2209     return RValue::get(Result);
2210   }
2211 
2212   case Builtin::BI__builtin_fpclassify: {
2213     Value *V = EmitScalarExpr(E->getArg(5));
2214     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
2215 
2216     // Create Result
2217     BasicBlock *Begin = Builder.GetInsertBlock();
2218     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
2219     Builder.SetInsertPoint(End);
2220     PHINode *Result =
2221       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
2222                         "fpclassify_result");
2223 
2224     // if (V==0) return FP_ZERO
2225     Builder.SetInsertPoint(Begin);
2226     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
2227                                           "iszero");
2228     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
2229     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
2230     Builder.CreateCondBr(IsZero, End, NotZero);
2231     Result->addIncoming(ZeroLiteral, Begin);
2232 
2233     // if (V != V) return FP_NAN
2234     Builder.SetInsertPoint(NotZero);
2235     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
2236     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
2237     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
2238     Builder.CreateCondBr(IsNan, End, NotNan);
2239     Result->addIncoming(NanLiteral, NotZero);
2240 
2241     // if (fabs(V) == infinity) return FP_INFINITY
2242     Builder.SetInsertPoint(NotNan);
2243     Value *VAbs = EmitFAbs(*this, V);
2244     Value *IsInf =
2245       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
2246                             "isinf");
2247     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
2248     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
2249     Builder.CreateCondBr(IsInf, End, NotInf);
2250     Result->addIncoming(InfLiteral, NotNan);
2251 
2252     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
2253     Builder.SetInsertPoint(NotInf);
2254     APFloat Smallest = APFloat::getSmallestNormalized(
2255         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
2256     Value *IsNormal =
2257       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
2258                             "isnormal");
2259     Value *NormalResult =
2260       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
2261                            EmitScalarExpr(E->getArg(3)));
2262     Builder.CreateBr(End);
2263     Result->addIncoming(NormalResult, NotInf);
2264 
2265     // return Result
2266     Builder.SetInsertPoint(End);
2267     return RValue::get(Result);
2268   }
2269 
2270   case Builtin::BIalloca:
2271   case Builtin::BI_alloca:
2272   case Builtin::BI__builtin_alloca: {
2273     Value *Size = EmitScalarExpr(E->getArg(0));
2274     const TargetInfo &TI = getContext().getTargetInfo();
2275     // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
2276     unsigned SuitableAlignmentInBytes =
2277         CGM.getContext()
2278             .toCharUnitsFromBits(TI.getSuitableAlign())
2279             .getQuantity();
2280     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
2281     AI->setAlignment(SuitableAlignmentInBytes);
2282     initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
2283     return RValue::get(AI);
2284   }
2285 
2286   case Builtin::BI__builtin_alloca_with_align: {
2287     Value *Size = EmitScalarExpr(E->getArg(0));
2288     Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
2289     auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
2290     unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
2291     unsigned AlignmentInBytes =
2292         CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
2293     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
2294     AI->setAlignment(AlignmentInBytes);
2295     initializeAlloca(*this, AI, Size, AlignmentInBytes);
2296     return RValue::get(AI);
2297   }
2298 
2299   case Builtin::BIbzero:
2300   case Builtin::BI__builtin_bzero: {
2301     Address Dest = EmitPointerWithAlignment(E->getArg(0));
2302     Value *SizeVal = EmitScalarExpr(E->getArg(1));
2303     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
2304                         E->getArg(0)->getExprLoc(), FD, 0);
2305     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
2306     return RValue::get(nullptr);
2307   }
2308   case Builtin::BImemcpy:
2309   case Builtin::BI__builtin_memcpy: {
2310     Address Dest = EmitPointerWithAlignment(E->getArg(0));
2311     Address Src = EmitPointerWithAlignment(E->getArg(1));
2312     Value *SizeVal = EmitScalarExpr(E->getArg(2));
2313     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
2314                         E->getArg(0)->getExprLoc(), FD, 0);
2315     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
2316                         E->getArg(1)->getExprLoc(), FD, 1);
2317     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
2318     return RValue::get(Dest.getPointer());
2319   }
2320 
2321   case Builtin::BI__builtin_char_memchr:
2322     BuiltinID = Builtin::BI__builtin_memchr;
2323     break;
2324 
2325   case Builtin::BI__builtin___memcpy_chk: {
2326     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
2327     Expr::EvalResult SizeResult, DstSizeResult;
2328     if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
2329         !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
2330       break;
2331     llvm::APSInt Size = SizeResult.Val.getInt();
2332     llvm::APSInt DstSize = DstSizeResult.Val.getInt();
2333     if (Size.ugt(DstSize))
2334       break;
2335     Address Dest = EmitPointerWithAlignment(E->getArg(0));
2336     Address Src = EmitPointerWithAlignment(E->getArg(1));
2337     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
2338     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
2339     return RValue::get(Dest.getPointer());
2340   }
2341 
2342   case Builtin::BI__builtin_objc_memmove_collectable: {
2343     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
2344     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
2345     Value *SizeVal = EmitScalarExpr(E->getArg(2));
2346     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
2347                                                   DestAddr, SrcAddr, SizeVal);
2348     return RValue::get(DestAddr.getPointer());
2349   }
2350 
2351   case Builtin::BI__builtin___memmove_chk: {
2352     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
2353     Expr::EvalResult SizeResult, DstSizeResult;
2354     if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
2355         !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
2356       break;
2357     llvm::APSInt Size = SizeResult.Val.getInt();
2358     llvm::APSInt DstSize = DstSizeResult.Val.getInt();
2359     if (Size.ugt(DstSize))
2360       break;
2361     Address Dest = EmitPointerWithAlignment(E->getArg(0));
2362     Address Src = EmitPointerWithAlignment(E->getArg(1));
2363     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
2364     Builder.CreateMemMove(Dest, Src, SizeVal, false);
2365     return RValue::get(Dest.getPointer());
2366   }
2367 
2368   case Builtin::BImemmove:
2369   case Builtin::BI__builtin_memmove: {
2370     Address Dest = EmitPointerWithAlignment(E->getArg(0));
2371     Address Src = EmitPointerWithAlignment(E->getArg(1));
2372     Value *SizeVal = EmitScalarExpr(E->getArg(2));
2373     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
2374                         E->getArg(0)->getExprLoc(), FD, 0);
2375     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
2376                         E->getArg(1)->getExprLoc(), FD, 1);
2377     Builder.CreateMemMove(Dest, Src, SizeVal, false);
2378     return RValue::get(Dest.getPointer());
2379   }
2380   case Builtin::BImemset:
2381   case Builtin::BI__builtin_memset: {
2382     Address Dest = EmitPointerWithAlignment(E->getArg(0));
2383     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
2384                                          Builder.getInt8Ty());
2385     Value *SizeVal = EmitScalarExpr(E->getArg(2));
2386     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
2387                         E->getArg(0)->getExprLoc(), FD, 0);
2388     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
2389     return RValue::get(Dest.getPointer());
2390   }
2391   case Builtin::BI__builtin___memset_chk: {
2392     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
2393     Expr::EvalResult SizeResult, DstSizeResult;
2394     if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
2395         !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
2396       break;
2397     llvm::APSInt Size = SizeResult.Val.getInt();
2398     llvm::APSInt DstSize = DstSizeResult.Val.getInt();
2399     if (Size.ugt(DstSize))
2400       break;
2401     Address Dest = EmitPointerWithAlignment(E->getArg(0));
2402     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
2403                                          Builder.getInt8Ty());
2404     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
2405     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
2406     return RValue::get(Dest.getPointer());
2407   }
2408   case Builtin::BI__builtin_wmemcmp: {
2409     // The MSVC runtime library does not provide a definition of wmemcmp, so we
2410     // need an inline implementation.
2411     if (!getTarget().getTriple().isOSMSVCRT())
2412       break;
2413 
2414     llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
2415 
2416     Value *Dst = EmitScalarExpr(E->getArg(0));
2417     Value *Src = EmitScalarExpr(E->getArg(1));
2418     Value *Size = EmitScalarExpr(E->getArg(2));
2419 
2420     BasicBlock *Entry = Builder.GetInsertBlock();
2421     BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
2422     BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
2423     BasicBlock *Next = createBasicBlock("wmemcmp.next");
2424     BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
2425     Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
2426     Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
2427 
2428     EmitBlock(CmpGT);
2429     PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
2430     DstPhi->addIncoming(Dst, Entry);
2431     PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
2432     SrcPhi->addIncoming(Src, Entry);
2433     PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
2434     SizePhi->addIncoming(Size, Entry);
2435     CharUnits WCharAlign =
2436         getContext().getTypeAlignInChars(getContext().WCharTy);
2437     Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
2438     Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
2439     Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
2440     Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
2441 
2442     EmitBlock(CmpLT);
2443     Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
2444     Builder.CreateCondBr(DstLtSrc, Exit, Next);
2445 
2446     EmitBlock(Next);
2447     Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
2448     Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
2449     Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
2450     Value *NextSizeEq0 =
2451         Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
2452     Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
2453     DstPhi->addIncoming(NextDst, Next);
2454     SrcPhi->addIncoming(NextSrc, Next);
2455     SizePhi->addIncoming(NextSize, Next);
2456 
2457     EmitBlock(Exit);
2458     PHINode *Ret = Builder.CreatePHI(IntTy, 4);
2459     Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
2460     Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
2461     Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
2462     Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
2463     return RValue::get(Ret);
2464   }
2465   case Builtin::BI__builtin_dwarf_cfa: {
2466     // The offset in bytes from the first argument to the CFA.
2467     //
2468     // Why on earth is this in the frontend?  Is there any reason at
2469     // all that the backend can't reasonably determine this while
2470     // lowering llvm.eh.dwarf.cfa()?
2471     //
2472     // TODO: If there's a satisfactory reason, add a target hook for
2473     // this instead of hard-coding 0, which is correct for most targets.
2474     int32_t Offset = 0;
2475 
2476     Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
2477     return RValue::get(Builder.CreateCall(F,
2478                                       llvm::ConstantInt::get(Int32Ty, Offset)));
2479   }
2480   case Builtin::BI__builtin_return_address: {
2481     Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
2482                                                    getContext().UnsignedIntTy);
2483     Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
2484     return RValue::get(Builder.CreateCall(F, Depth));
2485   }
2486   case Builtin::BI_ReturnAddress: {
2487     Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
2488     return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
2489   }
2490   case Builtin::BI__builtin_frame_address: {
2491     Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
2492                                                    getContext().UnsignedIntTy);
2493     Function *F = CGM.getIntrinsic(Intrinsic::frameaddress);
2494     return RValue::get(Builder.CreateCall(F, Depth));
2495   }
2496   case Builtin::BI__builtin_extract_return_addr: {
2497     Value *Address = EmitScalarExpr(E->getArg(0));
2498     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
2499     return RValue::get(Result);
2500   }
2501   case Builtin::BI__builtin_frob_return_addr: {
2502     Value *Address = EmitScalarExpr(E->getArg(0));
2503     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
2504     return RValue::get(Result);
2505   }
2506   case Builtin::BI__builtin_dwarf_sp_column: {
2507     llvm::IntegerType *Ty
2508       = cast<llvm::IntegerType>(ConvertType(E->getType()));
2509     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
2510     if (Column == -1) {
2511       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
2512       return RValue::get(llvm::UndefValue::get(Ty));
2513     }
2514     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
2515   }
2516   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
2517     Value *Address = EmitScalarExpr(E->getArg(0));
2518     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
2519       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
2520     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
2521   }
2522   case Builtin::BI__builtin_eh_return: {
2523     Value *Int = EmitScalarExpr(E->getArg(0));
2524     Value *Ptr = EmitScalarExpr(E->getArg(1));
2525 
2526     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
2527     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
2528            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
2529     Function *F =
2530         CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
2531                                                     : Intrinsic::eh_return_i64);
2532     Builder.CreateCall(F, {Int, Ptr});
2533     Builder.CreateUnreachable();
2534 
2535     // We do need to preserve an insertion point.
2536     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
2537 
2538     return RValue::get(nullptr);
2539   }
2540   case Builtin::BI__builtin_unwind_init: {
2541     Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
2542     return RValue::get(Builder.CreateCall(F));
2543   }
2544   case Builtin::BI__builtin_extend_pointer: {
2545     // Extends a pointer to the size of an _Unwind_Word, which is
2546     // uint64_t on all platforms.  Generally this gets poked into a
2547     // register and eventually used as an address, so if the
2548     // addressing registers are wider than pointers and the platform
2549     // doesn't implicitly ignore high-order bits when doing
2550     // addressing, we need to make sure we zext / sext based on
2551     // the platform's expectations.
2552     //
2553     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
2554 
2555     // Cast the pointer to intptr_t.
2556     Value *Ptr = EmitScalarExpr(E->getArg(0));
2557     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
2558 
2559     // If that's 64 bits, we're done.
2560     if (IntPtrTy->getBitWidth() == 64)
2561       return RValue::get(Result);
2562 
2563     // Otherwise, ask the codegen data what to do.
2564     if (getTargetHooks().extendPointerWithSExt())
2565       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
2566     else
2567       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
2568   }
2569   case Builtin::BI__builtin_setjmp: {
2570     // Buffer is a void**.
2571     Address Buf = EmitPointerWithAlignment(E->getArg(0));
2572 
2573     // Store the frame pointer to the setjmp buffer.
2574     Value *FrameAddr =
2575       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2576                          ConstantInt::get(Int32Ty, 0));
2577     Builder.CreateStore(FrameAddr, Buf);
2578 
2579     // Store the stack pointer to the setjmp buffer.
2580     Value *StackAddr =
2581         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
2582     Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
2583     Builder.CreateStore(StackAddr, StackSaveSlot);
2584 
2585     // Call LLVM's EH setjmp, which is lightweight.
2586     Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
2587     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
2588     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
2589   }
2590   case Builtin::BI__builtin_longjmp: {
2591     Value *Buf = EmitScalarExpr(E->getArg(0));
2592     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
2593 
2594     // Call LLVM's EH longjmp, which is lightweight.
2595     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
2596 
2597     // longjmp doesn't return; mark this as unreachable.
2598     Builder.CreateUnreachable();
2599 
2600     // We do need to preserve an insertion point.
2601     EmitBlock(createBasicBlock("longjmp.cont"));
2602 
2603     return RValue::get(nullptr);
2604   }
2605   case Builtin::BI__builtin_launder: {
2606     const Expr *Arg = E->getArg(0);
2607     QualType ArgTy = Arg->getType()->getPointeeType();
2608     Value *Ptr = EmitScalarExpr(Arg);
2609     if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
2610       Ptr = Builder.CreateLaunderInvariantGroup(Ptr);
2611 
2612     return RValue::get(Ptr);
2613   }
2614   case Builtin::BI__sync_fetch_and_add:
2615   case Builtin::BI__sync_fetch_and_sub:
2616   case Builtin::BI__sync_fetch_and_or:
2617   case Builtin::BI__sync_fetch_and_and:
2618   case Builtin::BI__sync_fetch_and_xor:
2619   case Builtin::BI__sync_fetch_and_nand:
2620   case Builtin::BI__sync_add_and_fetch:
2621   case Builtin::BI__sync_sub_and_fetch:
2622   case Builtin::BI__sync_and_and_fetch:
2623   case Builtin::BI__sync_or_and_fetch:
2624   case Builtin::BI__sync_xor_and_fetch:
2625   case Builtin::BI__sync_nand_and_fetch:
2626   case Builtin::BI__sync_val_compare_and_swap:
2627   case Builtin::BI__sync_bool_compare_and_swap:
2628   case Builtin::BI__sync_lock_test_and_set:
2629   case Builtin::BI__sync_lock_release:
2630   case Builtin::BI__sync_swap:
2631     llvm_unreachable("Shouldn't make it through sema");
2632   case Builtin::BI__sync_fetch_and_add_1:
2633   case Builtin::BI__sync_fetch_and_add_2:
2634   case Builtin::BI__sync_fetch_and_add_4:
2635   case Builtin::BI__sync_fetch_and_add_8:
2636   case Builtin::BI__sync_fetch_and_add_16:
2637     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
2638   case Builtin::BI__sync_fetch_and_sub_1:
2639   case Builtin::BI__sync_fetch_and_sub_2:
2640   case Builtin::BI__sync_fetch_and_sub_4:
2641   case Builtin::BI__sync_fetch_and_sub_8:
2642   case Builtin::BI__sync_fetch_and_sub_16:
2643     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
2644   case Builtin::BI__sync_fetch_and_or_1:
2645   case Builtin::BI__sync_fetch_and_or_2:
2646   case Builtin::BI__sync_fetch_and_or_4:
2647   case Builtin::BI__sync_fetch_and_or_8:
2648   case Builtin::BI__sync_fetch_and_or_16:
2649     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
2650   case Builtin::BI__sync_fetch_and_and_1:
2651   case Builtin::BI__sync_fetch_and_and_2:
2652   case Builtin::BI__sync_fetch_and_and_4:
2653   case Builtin::BI__sync_fetch_and_and_8:
2654   case Builtin::BI__sync_fetch_and_and_16:
2655     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
2656   case Builtin::BI__sync_fetch_and_xor_1:
2657   case Builtin::BI__sync_fetch_and_xor_2:
2658   case Builtin::BI__sync_fetch_and_xor_4:
2659   case Builtin::BI__sync_fetch_and_xor_8:
2660   case Builtin::BI__sync_fetch_and_xor_16:
2661     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
2662   case Builtin::BI__sync_fetch_and_nand_1:
2663   case Builtin::BI__sync_fetch_and_nand_2:
2664   case Builtin::BI__sync_fetch_and_nand_4:
2665   case Builtin::BI__sync_fetch_and_nand_8:
2666   case Builtin::BI__sync_fetch_and_nand_16:
2667     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
2668 
2669   // Clang extensions: not overloaded yet.
2670   case Builtin::BI__sync_fetch_and_min:
2671     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
2672   case Builtin::BI__sync_fetch_and_max:
2673     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
2674   case Builtin::BI__sync_fetch_and_umin:
2675     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
2676   case Builtin::BI__sync_fetch_and_umax:
2677     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
2678 
2679   case Builtin::BI__sync_add_and_fetch_1:
2680   case Builtin::BI__sync_add_and_fetch_2:
2681   case Builtin::BI__sync_add_and_fetch_4:
2682   case Builtin::BI__sync_add_and_fetch_8:
2683   case Builtin::BI__sync_add_and_fetch_16:
2684     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
2685                                 llvm::Instruction::Add);
2686   case Builtin::BI__sync_sub_and_fetch_1:
2687   case Builtin::BI__sync_sub_and_fetch_2:
2688   case Builtin::BI__sync_sub_and_fetch_4:
2689   case Builtin::BI__sync_sub_and_fetch_8:
2690   case Builtin::BI__sync_sub_and_fetch_16:
2691     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
2692                                 llvm::Instruction::Sub);
2693   case Builtin::BI__sync_and_and_fetch_1:
2694   case Builtin::BI__sync_and_and_fetch_2:
2695   case Builtin::BI__sync_and_and_fetch_4:
2696   case Builtin::BI__sync_and_and_fetch_8:
2697   case Builtin::BI__sync_and_and_fetch_16:
2698     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
2699                                 llvm::Instruction::And);
2700   case Builtin::BI__sync_or_and_fetch_1:
2701   case Builtin::BI__sync_or_and_fetch_2:
2702   case Builtin::BI__sync_or_and_fetch_4:
2703   case Builtin::BI__sync_or_and_fetch_8:
2704   case Builtin::BI__sync_or_and_fetch_16:
2705     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
2706                                 llvm::Instruction::Or);
2707   case Builtin::BI__sync_xor_and_fetch_1:
2708   case Builtin::BI__sync_xor_and_fetch_2:
2709   case Builtin::BI__sync_xor_and_fetch_4:
2710   case Builtin::BI__sync_xor_and_fetch_8:
2711   case Builtin::BI__sync_xor_and_fetch_16:
2712     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
2713                                 llvm::Instruction::Xor);
2714   case Builtin::BI__sync_nand_and_fetch_1:
2715   case Builtin::BI__sync_nand_and_fetch_2:
2716   case Builtin::BI__sync_nand_and_fetch_4:
2717   case Builtin::BI__sync_nand_and_fetch_8:
2718   case Builtin::BI__sync_nand_and_fetch_16:
2719     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
2720                                 llvm::Instruction::And, true);
2721 
2722   case Builtin::BI__sync_val_compare_and_swap_1:
2723   case Builtin::BI__sync_val_compare_and_swap_2:
2724   case Builtin::BI__sync_val_compare_and_swap_4:
2725   case Builtin::BI__sync_val_compare_and_swap_8:
2726   case Builtin::BI__sync_val_compare_and_swap_16:
2727     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
2728 
2729   case Builtin::BI__sync_bool_compare_and_swap_1:
2730   case Builtin::BI__sync_bool_compare_and_swap_2:
2731   case Builtin::BI__sync_bool_compare_and_swap_4:
2732   case Builtin::BI__sync_bool_compare_and_swap_8:
2733   case Builtin::BI__sync_bool_compare_and_swap_16:
2734     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
2735 
2736   case Builtin::BI__sync_swap_1:
2737   case Builtin::BI__sync_swap_2:
2738   case Builtin::BI__sync_swap_4:
2739   case Builtin::BI__sync_swap_8:
2740   case Builtin::BI__sync_swap_16:
2741     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
2742 
2743   case Builtin::BI__sync_lock_test_and_set_1:
2744   case Builtin::BI__sync_lock_test_and_set_2:
2745   case Builtin::BI__sync_lock_test_and_set_4:
2746   case Builtin::BI__sync_lock_test_and_set_8:
2747   case Builtin::BI__sync_lock_test_and_set_16:
2748     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
2749 
2750   case Builtin::BI__sync_lock_release_1:
2751   case Builtin::BI__sync_lock_release_2:
2752   case Builtin::BI__sync_lock_release_4:
2753   case Builtin::BI__sync_lock_release_8:
2754   case Builtin::BI__sync_lock_release_16: {
2755     Value *Ptr = EmitScalarExpr(E->getArg(0));
2756     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
2757     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
2758     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
2759                                              StoreSize.getQuantity() * 8);
2760     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
2761     llvm::StoreInst *Store =
2762       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
2763                                  StoreSize);
2764     Store->setAtomic(llvm::AtomicOrdering::Release);
2765     return RValue::get(nullptr);
2766   }
2767 
2768   case Builtin::BI__sync_synchronize: {
2769     // We assume this is supposed to correspond to a C++0x-style
2770     // sequentially-consistent fence (i.e. this is only usable for
2771     // synchronization, not device I/O or anything like that). This intrinsic
2772     // is really badly designed in the sense that in theory, there isn't
2773     // any way to safely use it... but in practice, it mostly works
2774     // to use it with non-atomic loads and stores to get acquire/release
2775     // semantics.
2776     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
2777     return RValue::get(nullptr);
2778   }
2779 
2780   case Builtin::BI__builtin_nontemporal_load:
2781     return RValue::get(EmitNontemporalLoad(*this, E));
2782   case Builtin::BI__builtin_nontemporal_store:
2783     return RValue::get(EmitNontemporalStore(*this, E));
2784   case Builtin::BI__c11_atomic_is_lock_free:
2785   case Builtin::BI__atomic_is_lock_free: {
2786     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
2787     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
2788     // _Atomic(T) is always properly-aligned.
2789     const char *LibCallName = "__atomic_is_lock_free";
2790     CallArgList Args;
2791     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
2792              getContext().getSizeType());
2793     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
2794       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
2795                getContext().VoidPtrTy);
2796     else
2797       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
2798                getContext().VoidPtrTy);
2799     const CGFunctionInfo &FuncInfo =
2800         CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
2801     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
2802     llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
2803     return EmitCall(FuncInfo, CGCallee::forDirect(Func),
2804                     ReturnValueSlot(), Args);
2805   }
2806 
2807   case Builtin::BI__atomic_test_and_set: {
2808     // Look at the argument type to determine whether this is a volatile
2809     // operation. The parameter type is always volatile.
2810     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
2811     bool Volatile =
2812         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
2813 
2814     Value *Ptr = EmitScalarExpr(E->getArg(0));
2815     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
2816     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
2817     Value *NewVal = Builder.getInt8(1);
2818     Value *Order = EmitScalarExpr(E->getArg(1));
2819     if (isa<llvm::ConstantInt>(Order)) {
2820       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2821       AtomicRMWInst *Result = nullptr;
2822       switch (ord) {
2823       case 0:  // memory_order_relaxed
2824       default: // invalid order
2825         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2826                                          llvm::AtomicOrdering::Monotonic);
2827         break;
2828       case 1: // memory_order_consume
2829       case 2: // memory_order_acquire
2830         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2831                                          llvm::AtomicOrdering::Acquire);
2832         break;
2833       case 3: // memory_order_release
2834         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2835                                          llvm::AtomicOrdering::Release);
2836         break;
2837       case 4: // memory_order_acq_rel
2838 
2839         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2840                                          llvm::AtomicOrdering::AcquireRelease);
2841         break;
2842       case 5: // memory_order_seq_cst
2843         Result = Builder.CreateAtomicRMW(
2844             llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2845             llvm::AtomicOrdering::SequentiallyConsistent);
2846         break;
2847       }
2848       Result->setVolatile(Volatile);
2849       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
2850     }
2851 
2852     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2853 
2854     llvm::BasicBlock *BBs[5] = {
2855       createBasicBlock("monotonic", CurFn),
2856       createBasicBlock("acquire", CurFn),
2857       createBasicBlock("release", CurFn),
2858       createBasicBlock("acqrel", CurFn),
2859       createBasicBlock("seqcst", CurFn)
2860     };
2861     llvm::AtomicOrdering Orders[5] = {
2862         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
2863         llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
2864         llvm::AtomicOrdering::SequentiallyConsistent};
2865 
2866     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2867     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
2868 
2869     Builder.SetInsertPoint(ContBB);
2870     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
2871 
2872     for (unsigned i = 0; i < 5; ++i) {
2873       Builder.SetInsertPoint(BBs[i]);
2874       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
2875                                                    Ptr, NewVal, Orders[i]);
2876       RMW->setVolatile(Volatile);
2877       Result->addIncoming(RMW, BBs[i]);
2878       Builder.CreateBr(ContBB);
2879     }
2880 
2881     SI->addCase(Builder.getInt32(0), BBs[0]);
2882     SI->addCase(Builder.getInt32(1), BBs[1]);
2883     SI->addCase(Builder.getInt32(2), BBs[1]);
2884     SI->addCase(Builder.getInt32(3), BBs[2]);
2885     SI->addCase(Builder.getInt32(4), BBs[3]);
2886     SI->addCase(Builder.getInt32(5), BBs[4]);
2887 
2888     Builder.SetInsertPoint(ContBB);
2889     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
2890   }
2891 
2892   case Builtin::BI__atomic_clear: {
2893     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
2894     bool Volatile =
2895         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
2896 
2897     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
2898     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
2899     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
2900     Value *NewVal = Builder.getInt8(0);
2901     Value *Order = EmitScalarExpr(E->getArg(1));
2902     if (isa<llvm::ConstantInt>(Order)) {
2903       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2904       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
2905       switch (ord) {
2906       case 0:  // memory_order_relaxed
2907       default: // invalid order
2908         Store->setOrdering(llvm::AtomicOrdering::Monotonic);
2909         break;
2910       case 3:  // memory_order_release
2911         Store->setOrdering(llvm::AtomicOrdering::Release);
2912         break;
2913       case 5:  // memory_order_seq_cst
2914         Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
2915         break;
2916       }
2917       return RValue::get(nullptr);
2918     }
2919 
2920     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2921 
2922     llvm::BasicBlock *BBs[3] = {
2923       createBasicBlock("monotonic", CurFn),
2924       createBasicBlock("release", CurFn),
2925       createBasicBlock("seqcst", CurFn)
2926     };
2927     llvm::AtomicOrdering Orders[3] = {
2928         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
2929         llvm::AtomicOrdering::SequentiallyConsistent};
2930 
2931     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2932     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
2933 
2934     for (unsigned i = 0; i < 3; ++i) {
2935       Builder.SetInsertPoint(BBs[i]);
2936       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
2937       Store->setOrdering(Orders[i]);
2938       Builder.CreateBr(ContBB);
2939     }
2940 
2941     SI->addCase(Builder.getInt32(0), BBs[0]);
2942     SI->addCase(Builder.getInt32(3), BBs[1]);
2943     SI->addCase(Builder.getInt32(5), BBs[2]);
2944 
2945     Builder.SetInsertPoint(ContBB);
2946     return RValue::get(nullptr);
2947   }
2948 
2949   case Builtin::BI__atomic_thread_fence:
2950   case Builtin::BI__atomic_signal_fence:
2951   case Builtin::BI__c11_atomic_thread_fence:
2952   case Builtin::BI__c11_atomic_signal_fence: {
2953     llvm::SyncScope::ID SSID;
2954     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
2955         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
2956       SSID = llvm::SyncScope::SingleThread;
2957     else
2958       SSID = llvm::SyncScope::System;
2959     Value *Order = EmitScalarExpr(E->getArg(0));
2960     if (isa<llvm::ConstantInt>(Order)) {
2961       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2962       switch (ord) {
2963       case 0:  // memory_order_relaxed
2964       default: // invalid order
2965         break;
2966       case 1:  // memory_order_consume
2967       case 2:  // memory_order_acquire
2968         Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
2969         break;
2970       case 3:  // memory_order_release
2971         Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
2972         break;
2973       case 4:  // memory_order_acq_rel
2974         Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
2975         break;
2976       case 5:  // memory_order_seq_cst
2977         Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
2978         break;
2979       }
2980       return RValue::get(nullptr);
2981     }
2982 
2983     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
2984     AcquireBB = createBasicBlock("acquire", CurFn);
2985     ReleaseBB = createBasicBlock("release", CurFn);
2986     AcqRelBB = createBasicBlock("acqrel", CurFn);
2987     SeqCstBB = createBasicBlock("seqcst", CurFn);
2988     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2989 
2990     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2991     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
2992 
2993     Builder.SetInsertPoint(AcquireBB);
2994     Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
2995     Builder.CreateBr(ContBB);
2996     SI->addCase(Builder.getInt32(1), AcquireBB);
2997     SI->addCase(Builder.getInt32(2), AcquireBB);
2998 
2999     Builder.SetInsertPoint(ReleaseBB);
3000     Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
3001     Builder.CreateBr(ContBB);
3002     SI->addCase(Builder.getInt32(3), ReleaseBB);
3003 
3004     Builder.SetInsertPoint(AcqRelBB);
3005     Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
3006     Builder.CreateBr(ContBB);
3007     SI->addCase(Builder.getInt32(4), AcqRelBB);
3008 
3009     Builder.SetInsertPoint(SeqCstBB);
3010     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
3011     Builder.CreateBr(ContBB);
3012     SI->addCase(Builder.getInt32(5), SeqCstBB);
3013 
3014     Builder.SetInsertPoint(ContBB);
3015     return RValue::get(nullptr);
3016   }
3017 
3018   case Builtin::BI__builtin_signbit:
3019   case Builtin::BI__builtin_signbitf:
3020   case Builtin::BI__builtin_signbitl: {
3021     return RValue::get(
3022         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
3023                            ConvertType(E->getType())));
3024   }
3025   case Builtin::BI__annotation: {
3026     // Re-encode each wide string to UTF8 and make an MDString.
3027     SmallVector<Metadata *, 1> Strings;
3028     for (const Expr *Arg : E->arguments()) {
3029       const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
3030       assert(Str->getCharByteWidth() == 2);
3031       StringRef WideBytes = Str->getBytes();
3032       std::string StrUtf8;
3033       if (!convertUTF16ToUTF8String(
3034               makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
3035         CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
3036         continue;
3037       }
3038       Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
3039     }
3040 
3041     // Build and MDTuple of MDStrings and emit the intrinsic call.
3042     llvm::Function *F =
3043         CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
3044     MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
3045     Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
3046     return RValue::getIgnored();
3047   }
3048   case Builtin::BI__builtin_annotation: {
3049     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
3050     llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
3051                                       AnnVal->getType());
3052 
3053     // Get the annotation string, go through casts. Sema requires this to be a
3054     // non-wide string literal, potentially casted, so the cast<> is safe.
3055     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
3056     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
3057     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
3058   }
3059   case Builtin::BI__builtin_addcb:
3060   case Builtin::BI__builtin_addcs:
3061   case Builtin::BI__builtin_addc:
3062   case Builtin::BI__builtin_addcl:
3063   case Builtin::BI__builtin_addcll:
3064   case Builtin::BI__builtin_subcb:
3065   case Builtin::BI__builtin_subcs:
3066   case Builtin::BI__builtin_subc:
3067   case Builtin::BI__builtin_subcl:
3068   case Builtin::BI__builtin_subcll: {
3069 
3070     // We translate all of these builtins from expressions of the form:
3071     //   int x = ..., y = ..., carryin = ..., carryout, result;
3072     //   result = __builtin_addc(x, y, carryin, &carryout);
3073     //
3074     // to LLVM IR of the form:
3075     //
3076     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
3077     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
3078     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
3079     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
3080     //                                                       i32 %carryin)
3081     //   %result = extractvalue {i32, i1} %tmp2, 0
3082     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
3083     //   %tmp3 = or i1 %carry1, %carry2
3084     //   %tmp4 = zext i1 %tmp3 to i32
3085     //   store i32 %tmp4, i32* %carryout
3086 
3087     // Scalarize our inputs.
3088     llvm::Value *X = EmitScalarExpr(E->getArg(0));
3089     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
3090     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
3091     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
3092 
3093     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
3094     llvm::Intrinsic::ID IntrinsicId;
3095     switch (BuiltinID) {
3096     default: llvm_unreachable("Unknown multiprecision builtin id.");
3097     case Builtin::BI__builtin_addcb:
3098     case Builtin::BI__builtin_addcs:
3099     case Builtin::BI__builtin_addc:
3100     case Builtin::BI__builtin_addcl:
3101     case Builtin::BI__builtin_addcll:
3102       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
3103       break;
3104     case Builtin::BI__builtin_subcb:
3105     case Builtin::BI__builtin_subcs:
3106     case Builtin::BI__builtin_subc:
3107     case Builtin::BI__builtin_subcl:
3108     case Builtin::BI__builtin_subcll:
3109       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
3110       break;
3111     }
3112 
3113     // Construct our resulting LLVM IR expression.
3114     llvm::Value *Carry1;
3115     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
3116                                               X, Y, Carry1);
3117     llvm::Value *Carry2;
3118     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
3119                                               Sum1, Carryin, Carry2);
3120     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
3121                                                X->getType());
3122     Builder.CreateStore(CarryOut, CarryOutPtr);
3123     return RValue::get(Sum2);
3124   }
3125 
3126   case Builtin::BI__builtin_add_overflow:
3127   case Builtin::BI__builtin_sub_overflow:
3128   case Builtin::BI__builtin_mul_overflow: {
3129     const clang::Expr *LeftArg = E->getArg(0);
3130     const clang::Expr *RightArg = E->getArg(1);
3131     const clang::Expr *ResultArg = E->getArg(2);
3132 
3133     clang::QualType ResultQTy =
3134         ResultArg->getType()->castAs<PointerType>()->getPointeeType();
3135 
3136     WidthAndSignedness LeftInfo =
3137         getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
3138     WidthAndSignedness RightInfo =
3139         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
3140     WidthAndSignedness ResultInfo =
3141         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
3142 
3143     // Handle mixed-sign multiplication as a special case, because adding
3144     // runtime or backend support for our generic irgen would be too expensive.
3145     if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
3146       return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
3147                                           RightInfo, ResultArg, ResultQTy,
3148                                           ResultInfo);
3149 
3150     WidthAndSignedness EncompassingInfo =
3151         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
3152 
3153     llvm::Type *EncompassingLLVMTy =
3154         llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
3155 
3156     llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
3157 
3158     llvm::Intrinsic::ID IntrinsicId;
3159     switch (BuiltinID) {
3160     default:
3161       llvm_unreachable("Unknown overflow builtin id.");
3162     case Builtin::BI__builtin_add_overflow:
3163       IntrinsicId = EncompassingInfo.Signed
3164                         ? llvm::Intrinsic::sadd_with_overflow
3165                         : llvm::Intrinsic::uadd_with_overflow;
3166       break;
3167     case Builtin::BI__builtin_sub_overflow:
3168       IntrinsicId = EncompassingInfo.Signed
3169                         ? llvm::Intrinsic::ssub_with_overflow
3170                         : llvm::Intrinsic::usub_with_overflow;
3171       break;
3172     case Builtin::BI__builtin_mul_overflow:
3173       IntrinsicId = EncompassingInfo.Signed
3174                         ? llvm::Intrinsic::smul_with_overflow
3175                         : llvm::Intrinsic::umul_with_overflow;
3176       break;
3177     }
3178 
3179     llvm::Value *Left = EmitScalarExpr(LeftArg);
3180     llvm::Value *Right = EmitScalarExpr(RightArg);
3181     Address ResultPtr = EmitPointerWithAlignment(ResultArg);
3182 
3183     // Extend each operand to the encompassing type.
3184     Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
3185     Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
3186 
3187     // Perform the operation on the extended values.
3188     llvm::Value *Overflow, *Result;
3189     Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
3190 
3191     if (EncompassingInfo.Width > ResultInfo.Width) {
3192       // The encompassing type is wider than the result type, so we need to
3193       // truncate it.
3194       llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
3195 
3196       // To see if the truncation caused an overflow, we will extend
3197       // the result and then compare it to the original result.
3198       llvm::Value *ResultTruncExt = Builder.CreateIntCast(
3199           ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
3200       llvm::Value *TruncationOverflow =
3201           Builder.CreateICmpNE(Result, ResultTruncExt);
3202 
3203       Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
3204       Result = ResultTrunc;
3205     }
3206 
3207     // Finally, store the result using the pointer.
3208     bool isVolatile =
3209       ResultArg->getType()->getPointeeType().isVolatileQualified();
3210     Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
3211 
3212     return RValue::get(Overflow);
3213   }
3214 
3215   case Builtin::BI__builtin_uadd_overflow:
3216   case Builtin::BI__builtin_uaddl_overflow:
3217   case Builtin::BI__builtin_uaddll_overflow:
3218   case Builtin::BI__builtin_usub_overflow:
3219   case Builtin::BI__builtin_usubl_overflow:
3220   case Builtin::BI__builtin_usubll_overflow:
3221   case Builtin::BI__builtin_umul_overflow:
3222   case Builtin::BI__builtin_umull_overflow:
3223   case Builtin::BI__builtin_umulll_overflow:
3224   case Builtin::BI__builtin_sadd_overflow:
3225   case Builtin::BI__builtin_saddl_overflow:
3226   case Builtin::BI__builtin_saddll_overflow:
3227   case Builtin::BI__builtin_ssub_overflow:
3228   case Builtin::BI__builtin_ssubl_overflow:
3229   case Builtin::BI__builtin_ssubll_overflow:
3230   case Builtin::BI__builtin_smul_overflow:
3231   case Builtin::BI__builtin_smull_overflow:
3232   case Builtin::BI__builtin_smulll_overflow: {
3233 
3234     // We translate all of these builtins directly to the relevant llvm IR node.
3235 
3236     // Scalarize our inputs.
3237     llvm::Value *X = EmitScalarExpr(E->getArg(0));
3238     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
3239     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
3240 
3241     // Decide which of the overflow intrinsics we are lowering to:
3242     llvm::Intrinsic::ID IntrinsicId;
3243     switch (BuiltinID) {
3244     default: llvm_unreachable("Unknown overflow builtin id.");
3245     case Builtin::BI__builtin_uadd_overflow:
3246     case Builtin::BI__builtin_uaddl_overflow:
3247     case Builtin::BI__builtin_uaddll_overflow:
3248       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
3249       break;
3250     case Builtin::BI__builtin_usub_overflow:
3251     case Builtin::BI__builtin_usubl_overflow:
3252     case Builtin::BI__builtin_usubll_overflow:
3253       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
3254       break;
3255     case Builtin::BI__builtin_umul_overflow:
3256     case Builtin::BI__builtin_umull_overflow:
3257     case Builtin::BI__builtin_umulll_overflow:
3258       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
3259       break;
3260     case Builtin::BI__builtin_sadd_overflow:
3261     case Builtin::BI__builtin_saddl_overflow:
3262     case Builtin::BI__builtin_saddll_overflow:
3263       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
3264       break;
3265     case Builtin::BI__builtin_ssub_overflow:
3266     case Builtin::BI__builtin_ssubl_overflow:
3267     case Builtin::BI__builtin_ssubll_overflow:
3268       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
3269       break;
3270     case Builtin::BI__builtin_smul_overflow:
3271     case Builtin::BI__builtin_smull_overflow:
3272     case Builtin::BI__builtin_smulll_overflow:
3273       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
3274       break;
3275     }
3276 
3277 
3278     llvm::Value *Carry;
3279     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
3280     Builder.CreateStore(Sum, SumOutPtr);
3281 
3282     return RValue::get(Carry);
3283   }
3284   case Builtin::BI__builtin_addressof:
3285     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
3286   case Builtin::BI__builtin_operator_new:
3287     return EmitBuiltinNewDeleteCall(
3288         E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
3289   case Builtin::BI__builtin_operator_delete:
3290     return EmitBuiltinNewDeleteCall(
3291         E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
3292 
3293   case Builtin::BI__noop:
3294     // __noop always evaluates to an integer literal zero.
3295     return RValue::get(ConstantInt::get(IntTy, 0));
3296   case Builtin::BI__builtin_call_with_static_chain: {
3297     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
3298     const Expr *Chain = E->getArg(1);
3299     return EmitCall(Call->getCallee()->getType(),
3300                     EmitCallee(Call->getCallee()), Call, ReturnValue,
3301                     EmitScalarExpr(Chain));
3302   }
3303   case Builtin::BI_InterlockedExchange8:
3304   case Builtin::BI_InterlockedExchange16:
3305   case Builtin::BI_InterlockedExchange:
3306   case Builtin::BI_InterlockedExchangePointer:
3307     return RValue::get(
3308         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
3309   case Builtin::BI_InterlockedCompareExchangePointer:
3310   case Builtin::BI_InterlockedCompareExchangePointer_nf: {
3311     llvm::Type *RTy;
3312     llvm::IntegerType *IntType =
3313       IntegerType::get(getLLVMContext(),
3314                        getContext().getTypeSize(E->getType()));
3315     llvm::Type *IntPtrType = IntType->getPointerTo();
3316 
3317     llvm::Value *Destination =
3318       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
3319 
3320     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
3321     RTy = Exchange->getType();
3322     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
3323 
3324     llvm::Value *Comparand =
3325       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
3326 
3327     auto Ordering =
3328       BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
3329       AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
3330 
3331     auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
3332                                               Ordering, Ordering);
3333     Result->setVolatile(true);
3334 
3335     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
3336                                                                          0),
3337                                               RTy));
3338   }
3339   case Builtin::BI_InterlockedCompareExchange8:
3340   case Builtin::BI_InterlockedCompareExchange16:
3341   case Builtin::BI_InterlockedCompareExchange:
3342   case Builtin::BI_InterlockedCompareExchange64:
3343     return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));
3344   case Builtin::BI_InterlockedIncrement16:
3345   case Builtin::BI_InterlockedIncrement:
3346     return RValue::get(
3347         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
3348   case Builtin::BI_InterlockedDecrement16:
3349   case Builtin::BI_InterlockedDecrement:
3350     return RValue::get(
3351         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
3352   case Builtin::BI_InterlockedAnd8:
3353   case Builtin::BI_InterlockedAnd16:
3354   case Builtin::BI_InterlockedAnd:
3355     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
3356   case Builtin::BI_InterlockedExchangeAdd8:
3357   case Builtin::BI_InterlockedExchangeAdd16:
3358   case Builtin::BI_InterlockedExchangeAdd:
3359     return RValue::get(
3360         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
3361   case Builtin::BI_InterlockedExchangeSub8:
3362   case Builtin::BI_InterlockedExchangeSub16:
3363   case Builtin::BI_InterlockedExchangeSub:
3364     return RValue::get(
3365         EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
3366   case Builtin::BI_InterlockedOr8:
3367   case Builtin::BI_InterlockedOr16:
3368   case Builtin::BI_InterlockedOr:
3369     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
3370   case Builtin::BI_InterlockedXor8:
3371   case Builtin::BI_InterlockedXor16:
3372   case Builtin::BI_InterlockedXor:
3373     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
3374 
3375   case Builtin::BI_bittest64:
3376   case Builtin::BI_bittest:
3377   case Builtin::BI_bittestandcomplement64:
3378   case Builtin::BI_bittestandcomplement:
3379   case Builtin::BI_bittestandreset64:
3380   case Builtin::BI_bittestandreset:
3381   case Builtin::BI_bittestandset64:
3382   case Builtin::BI_bittestandset:
3383   case Builtin::BI_interlockedbittestandreset:
3384   case Builtin::BI_interlockedbittestandreset64:
3385   case Builtin::BI_interlockedbittestandset64:
3386   case Builtin::BI_interlockedbittestandset:
3387   case Builtin::BI_interlockedbittestandset_acq:
3388   case Builtin::BI_interlockedbittestandset_rel:
3389   case Builtin::BI_interlockedbittestandset_nf:
3390   case Builtin::BI_interlockedbittestandreset_acq:
3391   case Builtin::BI_interlockedbittestandreset_rel:
3392   case Builtin::BI_interlockedbittestandreset_nf:
3393     return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
3394 
3395     // These builtins exist to emit regular volatile loads and stores not
3396     // affected by the -fms-volatile setting.
3397   case Builtin::BI__iso_volatile_load8:
3398   case Builtin::BI__iso_volatile_load16:
3399   case Builtin::BI__iso_volatile_load32:
3400   case Builtin::BI__iso_volatile_load64:
3401     return RValue::get(EmitISOVolatileLoad(*this, E));
3402   case Builtin::BI__iso_volatile_store8:
3403   case Builtin::BI__iso_volatile_store16:
3404   case Builtin::BI__iso_volatile_store32:
3405   case Builtin::BI__iso_volatile_store64:
3406     return RValue::get(EmitISOVolatileStore(*this, E));
3407 
3408   case Builtin::BI__exception_code:
3409   case Builtin::BI_exception_code:
3410     return RValue::get(EmitSEHExceptionCode());
3411   case Builtin::BI__exception_info:
3412   case Builtin::BI_exception_info:
3413     return RValue::get(EmitSEHExceptionInfo());
3414   case Builtin::BI__abnormal_termination:
3415   case Builtin::BI_abnormal_termination:
3416     return RValue::get(EmitSEHAbnormalTermination());
3417   case Builtin::BI_setjmpex:
3418     if (getTarget().getTriple().isOSMSVCRT())
3419       return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
3420     break;
3421   case Builtin::BI_setjmp:
3422     if (getTarget().getTriple().isOSMSVCRT()) {
3423       if (getTarget().getTriple().getArch() == llvm::Triple::x86)
3424         return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
3425       else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
3426         return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
3427       return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
3428     }
3429     break;
3430 
3431   case Builtin::BI__GetExceptionInfo: {
3432     if (llvm::GlobalVariable *GV =
3433             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
3434       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
3435     break;
3436   }
3437 
3438   case Builtin::BI__fastfail:
3439     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
3440 
3441   case Builtin::BI__builtin_coro_size: {
3442     auto & Context = getContext();
3443     auto SizeTy = Context.getSizeType();
3444     auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
3445     Function *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
3446     return RValue::get(Builder.CreateCall(F));
3447   }
3448 
3449   case Builtin::BI__builtin_coro_id:
3450     return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
3451   case Builtin::BI__builtin_coro_promise:
3452     return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
3453   case Builtin::BI__builtin_coro_resume:
3454     return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
3455   case Builtin::BI__builtin_coro_frame:
3456     return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
3457   case Builtin::BI__builtin_coro_noop:
3458     return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
3459   case Builtin::BI__builtin_coro_free:
3460     return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
3461   case Builtin::BI__builtin_coro_destroy:
3462     return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
3463   case Builtin::BI__builtin_coro_done:
3464     return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
3465   case Builtin::BI__builtin_coro_alloc:
3466     return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
3467   case Builtin::BI__builtin_coro_begin:
3468     return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
3469   case Builtin::BI__builtin_coro_end:
3470     return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
3471   case Builtin::BI__builtin_coro_suspend:
3472     return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
3473   case Builtin::BI__builtin_coro_param:
3474     return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
3475 
3476   // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
3477   case Builtin::BIread_pipe:
3478   case Builtin::BIwrite_pipe: {
3479     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
3480           *Arg1 = EmitScalarExpr(E->getArg(1));
3481     CGOpenCLRuntime OpenCLRT(CGM);
3482     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
3483     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
3484 
3485     // Type of the generic packet parameter.
3486     unsigned GenericAS =
3487         getContext().getTargetAddressSpace(LangAS::opencl_generic);
3488     llvm::Type *I8PTy = llvm::PointerType::get(
3489         llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
3490 
3491     // Testing which overloaded version we should generate the call for.
3492     if (2U == E->getNumArgs()) {
3493       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
3494                                                              : "__write_pipe_2";
3495       // Creating a generic function type to be able to call with any builtin or
3496       // user defined type.
3497       llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
3498       llvm::FunctionType *FTy = llvm::FunctionType::get(
3499           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3500       Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
3501       return RValue::get(
3502           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3503                              {Arg0, BCast, PacketSize, PacketAlign}));
3504     } else {
3505       assert(4 == E->getNumArgs() &&
3506              "Illegal number of parameters to pipe function");
3507       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
3508                                                              : "__write_pipe_4";
3509 
3510       llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
3511                               Int32Ty, Int32Ty};
3512       Value *Arg2 = EmitScalarExpr(E->getArg(2)),
3513             *Arg3 = EmitScalarExpr(E->getArg(3));
3514       llvm::FunctionType *FTy = llvm::FunctionType::get(
3515           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3516       Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
3517       // We know the third argument is an integer type, but we may need to cast
3518       // it to i32.
3519       if (Arg2->getType() != Int32Ty)
3520         Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
3521       return RValue::get(Builder.CreateCall(
3522           CGM.CreateRuntimeFunction(FTy, Name),
3523           {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
3524     }
3525   }
3526   // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
3527   // functions
3528   case Builtin::BIreserve_read_pipe:
3529   case Builtin::BIreserve_write_pipe:
3530   case Builtin::BIwork_group_reserve_read_pipe:
3531   case Builtin::BIwork_group_reserve_write_pipe:
3532   case Builtin::BIsub_group_reserve_read_pipe:
3533   case Builtin::BIsub_group_reserve_write_pipe: {
3534     // Composing the mangled name for the function.
3535     const char *Name;
3536     if (BuiltinID == Builtin::BIreserve_read_pipe)
3537       Name = "__reserve_read_pipe";
3538     else if (BuiltinID == Builtin::BIreserve_write_pipe)
3539       Name = "__reserve_write_pipe";
3540     else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
3541       Name = "__work_group_reserve_read_pipe";
3542     else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
3543       Name = "__work_group_reserve_write_pipe";
3544     else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
3545       Name = "__sub_group_reserve_read_pipe";
3546     else
3547       Name = "__sub_group_reserve_write_pipe";
3548 
3549     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
3550           *Arg1 = EmitScalarExpr(E->getArg(1));
3551     llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
3552     CGOpenCLRuntime OpenCLRT(CGM);
3553     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
3554     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
3555 
3556     // Building the generic function prototype.
3557     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
3558     llvm::FunctionType *FTy = llvm::FunctionType::get(
3559         ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3560     // We know the second argument is an integer type, but we may need to cast
3561     // it to i32.
3562     if (Arg1->getType() != Int32Ty)
3563       Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
3564     return RValue::get(
3565         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3566                            {Arg0, Arg1, PacketSize, PacketAlign}));
3567   }
3568   // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
3569   // functions
3570   case Builtin::BIcommit_read_pipe:
3571   case Builtin::BIcommit_write_pipe:
3572   case Builtin::BIwork_group_commit_read_pipe:
3573   case Builtin::BIwork_group_commit_write_pipe:
3574   case Builtin::BIsub_group_commit_read_pipe:
3575   case Builtin::BIsub_group_commit_write_pipe: {
3576     const char *Name;
3577     if (BuiltinID == Builtin::BIcommit_read_pipe)
3578       Name = "__commit_read_pipe";
3579     else if (BuiltinID == Builtin::BIcommit_write_pipe)
3580       Name = "__commit_write_pipe";
3581     else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
3582       Name = "__work_group_commit_read_pipe";
3583     else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
3584       Name = "__work_group_commit_write_pipe";
3585     else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
3586       Name = "__sub_group_commit_read_pipe";
3587     else
3588       Name = "__sub_group_commit_write_pipe";
3589 
3590     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
3591           *Arg1 = EmitScalarExpr(E->getArg(1));
3592     CGOpenCLRuntime OpenCLRT(CGM);
3593     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
3594     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
3595 
3596     // Building the generic function prototype.
3597     llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
3598     llvm::FunctionType *FTy =
3599         llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
3600                                 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3601 
3602     return RValue::get(
3603         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3604                            {Arg0, Arg1, PacketSize, PacketAlign}));
3605   }
3606   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
3607   case Builtin::BIget_pipe_num_packets:
3608   case Builtin::BIget_pipe_max_packets: {
3609     const char *BaseName;
3610     const PipeType *PipeTy = E->getArg(0)->getType()->getAs<PipeType>();
3611     if (BuiltinID == Builtin::BIget_pipe_num_packets)
3612       BaseName = "__get_pipe_num_packets";
3613     else
3614       BaseName = "__get_pipe_max_packets";
3615     auto Name = std::string(BaseName) +
3616                 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
3617 
3618     // Building the generic function prototype.
3619     Value *Arg0 = EmitScalarExpr(E->getArg(0));
3620     CGOpenCLRuntime OpenCLRT(CGM);
3621     Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
3622     Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
3623     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
3624     llvm::FunctionType *FTy = llvm::FunctionType::get(
3625         Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3626 
3627     return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3628                                           {Arg0, PacketSize, PacketAlign}));
3629   }
3630 
3631   // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
3632   case Builtin::BIto_global:
3633   case Builtin::BIto_local:
3634   case Builtin::BIto_private: {
3635     auto Arg0 = EmitScalarExpr(E->getArg(0));
3636     auto NewArgT = llvm::PointerType::get(Int8Ty,
3637       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
3638     auto NewRetT = llvm::PointerType::get(Int8Ty,
3639       CGM.getContext().getTargetAddressSpace(
3640         E->getType()->getPointeeType().getAddressSpace()));
3641     auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
3642     llvm::Value *NewArg;
3643     if (Arg0->getType()->getPointerAddressSpace() !=
3644         NewArgT->getPointerAddressSpace())
3645       NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
3646     else
3647       NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
3648     auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
3649     auto NewCall =
3650         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
3651     return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
3652       ConvertType(E->getType())));
3653   }
3654 
3655   // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
3656   // It contains four different overload formats specified in Table 6.13.17.1.
3657   case Builtin::BIenqueue_kernel: {
3658     StringRef Name; // Generated function call name
3659     unsigned NumArgs = E->getNumArgs();
3660 
3661     llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
3662     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3663         getContext().getTargetAddressSpace(LangAS::opencl_generic));
3664 
3665     llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
3666     llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
3667     LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
3668     llvm::Value *Range = NDRangeL.getAddress().getPointer();
3669     llvm::Type *RangeTy = NDRangeL.getAddress().getType();
3670 
3671     if (NumArgs == 4) {
3672       // The most basic form of the call with parameters:
3673       // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
3674       Name = "__enqueue_kernel_basic";
3675       llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
3676                               GenericVoidPtrTy};
3677       llvm::FunctionType *FTy = llvm::FunctionType::get(
3678           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3679 
3680       auto Info =
3681           CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
3682       llvm::Value *Kernel =
3683           Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3684       llvm::Value *Block =
3685           Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3686 
3687       AttrBuilder B;
3688       B.addAttribute(Attribute::ByVal);
3689       llvm::AttributeList ByValAttrSet =
3690           llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
3691 
3692       auto RTCall =
3693           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
3694                              {Queue, Flags, Range, Kernel, Block});
3695       RTCall->setAttributes(ByValAttrSet);
3696       return RValue::get(RTCall);
3697     }
3698     assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
3699 
3700     // Create a temporary array to hold the sizes of local pointer arguments
3701     // for the block. \p First is the position of the first size argument.
3702     auto CreateArrayForSizeVar = [=](unsigned First)
3703         -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
3704       llvm::APInt ArraySize(32, NumArgs - First);
3705       QualType SizeArrayTy = getContext().getConstantArrayType(
3706           getContext().getSizeType(), ArraySize, ArrayType::Normal,
3707           /*IndexTypeQuals=*/0);
3708       auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
3709       llvm::Value *TmpPtr = Tmp.getPointer();
3710       llvm::Value *TmpSize = EmitLifetimeStart(
3711           CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
3712       llvm::Value *ElemPtr;
3713       // Each of the following arguments specifies the size of the corresponding
3714       // argument passed to the enqueued block.
3715       auto *Zero = llvm::ConstantInt::get(IntTy, 0);
3716       for (unsigned I = First; I < NumArgs; ++I) {
3717         auto *Index = llvm::ConstantInt::get(IntTy, I - First);
3718         auto *GEP = Builder.CreateGEP(TmpPtr, {Zero, Index});
3719         if (I == First)
3720           ElemPtr = GEP;
3721         auto *V =
3722             Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
3723         Builder.CreateAlignedStore(
3724             V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
3725       }
3726       return std::tie(ElemPtr, TmpSize, TmpPtr);
3727     };
3728 
3729     // Could have events and/or varargs.
3730     if (E->getArg(3)->getType()->isBlockPointerType()) {
3731       // No events passed, but has variadic arguments.
3732       Name = "__enqueue_kernel_varargs";
3733       auto Info =
3734           CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
3735       llvm::Value *Kernel =
3736           Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3737       auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3738       llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
3739       std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
3740 
3741       // Create a vector of the arguments, as well as a constant value to
3742       // express to the runtime the number of variadic arguments.
3743       std::vector<llvm::Value *> Args = {
3744           Queue,  Flags, Range,
3745           Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4),
3746           ElemPtr};
3747       std::vector<llvm::Type *> ArgTys = {
3748           QueueTy,          IntTy, RangeTy,           GenericVoidPtrTy,
3749           GenericVoidPtrTy, IntTy, ElemPtr->getType()};
3750 
3751       llvm::FunctionType *FTy = llvm::FunctionType::get(
3752           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3753       auto Call =
3754           RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3755                                          llvm::ArrayRef<llvm::Value *>(Args)));
3756       if (TmpSize)
3757         EmitLifetimeEnd(TmpSize, TmpPtr);
3758       return Call;
3759     }
3760     // Any calls now have event arguments passed.
3761     if (NumArgs >= 7) {
3762       llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
3763       llvm::PointerType *EventPtrTy = EventTy->getPointerTo(
3764           CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
3765 
3766       llvm::Value *NumEvents =
3767           Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
3768 
3769       // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
3770       // to be a null pointer constant (including `0` literal), we can take it
3771       // into account and emit null pointer directly.
3772       llvm::Value *EventWaitList = nullptr;
3773       if (E->getArg(4)->isNullPointerConstant(
3774               getContext(), Expr::NPC_ValueDependentIsNotNull)) {
3775         EventWaitList = llvm::ConstantPointerNull::get(EventPtrTy);
3776       } else {
3777         EventWaitList = E->getArg(4)->getType()->isArrayType()
3778                         ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
3779                         : EmitScalarExpr(E->getArg(4));
3780         // Convert to generic address space.
3781         EventWaitList = Builder.CreatePointerCast(EventWaitList, EventPtrTy);
3782       }
3783       llvm::Value *EventRet = nullptr;
3784       if (E->getArg(5)->isNullPointerConstant(
3785               getContext(), Expr::NPC_ValueDependentIsNotNull)) {
3786         EventRet = llvm::ConstantPointerNull::get(EventPtrTy);
3787       } else {
3788         EventRet =
3789             Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), EventPtrTy);
3790       }
3791 
3792       auto Info =
3793           CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
3794       llvm::Value *Kernel =
3795           Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3796       llvm::Value *Block =
3797           Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3798 
3799       std::vector<llvm::Type *> ArgTys = {
3800           QueueTy,    Int32Ty,    RangeTy,          Int32Ty,
3801           EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
3802 
3803       std::vector<llvm::Value *> Args = {Queue,     Flags,         Range,
3804                                          NumEvents, EventWaitList, EventRet,
3805                                          Kernel,    Block};
3806 
3807       if (NumArgs == 7) {
3808         // Has events but no variadics.
3809         Name = "__enqueue_kernel_basic_events";
3810         llvm::FunctionType *FTy = llvm::FunctionType::get(
3811             Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3812         return RValue::get(
3813             Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3814                                llvm::ArrayRef<llvm::Value *>(Args)));
3815       }
3816       // Has event info and variadics
3817       // Pass the number of variadics to the runtime function too.
3818       Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
3819       ArgTys.push_back(Int32Ty);
3820       Name = "__enqueue_kernel_events_varargs";
3821 
3822       llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
3823       std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
3824       Args.push_back(ElemPtr);
3825       ArgTys.push_back(ElemPtr->getType());
3826 
3827       llvm::FunctionType *FTy = llvm::FunctionType::get(
3828           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3829       auto Call =
3830           RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3831                                          llvm::ArrayRef<llvm::Value *>(Args)));
3832       if (TmpSize)
3833         EmitLifetimeEnd(TmpSize, TmpPtr);
3834       return Call;
3835     }
3836     LLVM_FALLTHROUGH;
3837   }
3838   // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
3839   // parameter.
3840   case Builtin::BIget_kernel_work_group_size: {
3841     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3842         getContext().getTargetAddressSpace(LangAS::opencl_generic));
3843     auto Info =
3844         CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
3845     Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3846     Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3847     return RValue::get(Builder.CreateCall(
3848         CGM.CreateRuntimeFunction(
3849             llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
3850                                     false),
3851             "__get_kernel_work_group_size_impl"),
3852         {Kernel, Arg}));
3853   }
3854   case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
3855     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3856         getContext().getTargetAddressSpace(LangAS::opencl_generic));
3857     auto Info =
3858         CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
3859     Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3860     Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3861     return RValue::get(Builder.CreateCall(
3862         CGM.CreateRuntimeFunction(
3863             llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
3864                                     false),
3865             "__get_kernel_preferred_work_group_size_multiple_impl"),
3866         {Kernel, Arg}));
3867   }
3868   case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
3869   case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
3870     llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3871         getContext().getTargetAddressSpace(LangAS::opencl_generic));
3872     LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
3873     llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
3874     auto Info =
3875         CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
3876     Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3877     Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3878     const char *Name =
3879         BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
3880             ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
3881             : "__get_kernel_sub_group_count_for_ndrange_impl";
3882     return RValue::get(Builder.CreateCall(
3883         CGM.CreateRuntimeFunction(
3884             llvm::FunctionType::get(
3885                 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
3886                 false),
3887             Name),
3888         {NDRange, Kernel, Block}));
3889   }
3890 
3891   case Builtin::BI__builtin_store_half:
3892   case Builtin::BI__builtin_store_halff: {
3893     Value *Val = EmitScalarExpr(E->getArg(0));
3894     Address Address = EmitPointerWithAlignment(E->getArg(1));
3895     Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
3896     return RValue::get(Builder.CreateStore(HalfVal, Address));
3897   }
3898   case Builtin::BI__builtin_load_half: {
3899     Address Address = EmitPointerWithAlignment(E->getArg(0));
3900     Value *HalfVal = Builder.CreateLoad(Address);
3901     return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
3902   }
3903   case Builtin::BI__builtin_load_halff: {
3904     Address Address = EmitPointerWithAlignment(E->getArg(0));
3905     Value *HalfVal = Builder.CreateLoad(Address);
3906     return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
3907   }
3908   case Builtin::BIprintf:
3909     if (getTarget().getTriple().isNVPTX())
3910       return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
3911     break;
3912   case Builtin::BI__builtin_canonicalize:
3913   case Builtin::BI__builtin_canonicalizef:
3914   case Builtin::BI__builtin_canonicalizel:
3915     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
3916 
3917   case Builtin::BI__builtin_thread_pointer: {
3918     if (!getContext().getTargetInfo().isTLSSupported())
3919       CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
3920     // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
3921     break;
3922   }
3923   case Builtin::BI__builtin_os_log_format:
3924     return emitBuiltinOSLogFormat(*E);
3925 
3926   case Builtin::BI__xray_customevent: {
3927     if (!ShouldXRayInstrumentFunction())
3928       return RValue::getIgnored();
3929 
3930     if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
3931             XRayInstrKind::Custom))
3932       return RValue::getIgnored();
3933 
3934     if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
3935       if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
3936         return RValue::getIgnored();
3937 
3938     Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
3939     auto FTy = F->getFunctionType();
3940     auto Arg0 = E->getArg(0);
3941     auto Arg0Val = EmitScalarExpr(Arg0);
3942     auto Arg0Ty = Arg0->getType();
3943     auto PTy0 = FTy->getParamType(0);
3944     if (PTy0 != Arg0Val->getType()) {
3945       if (Arg0Ty->isArrayType())
3946         Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
3947       else
3948         Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
3949     }
3950     auto Arg1 = EmitScalarExpr(E->getArg(1));
3951     auto PTy1 = FTy->getParamType(1);
3952     if (PTy1 != Arg1->getType())
3953       Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
3954     return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
3955   }
3956 
3957   case Builtin::BI__xray_typedevent: {
3958     // TODO: There should be a way to always emit events even if the current
3959     // function is not instrumented. Losing events in a stream can cripple
3960     // a trace.
3961     if (!ShouldXRayInstrumentFunction())
3962       return RValue::getIgnored();
3963 
3964     if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
3965             XRayInstrKind::Typed))
3966       return RValue::getIgnored();
3967 
3968     if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
3969       if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
3970         return RValue::getIgnored();
3971 
3972     Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
3973     auto FTy = F->getFunctionType();
3974     auto Arg0 = EmitScalarExpr(E->getArg(0));
3975     auto PTy0 = FTy->getParamType(0);
3976     if (PTy0 != Arg0->getType())
3977       Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
3978     auto Arg1 = E->getArg(1);
3979     auto Arg1Val = EmitScalarExpr(Arg1);
3980     auto Arg1Ty = Arg1->getType();
3981     auto PTy1 = FTy->getParamType(1);
3982     if (PTy1 != Arg1Val->getType()) {
3983       if (Arg1Ty->isArrayType())
3984         Arg1Val = EmitArrayToPointerDecay(Arg1).getPointer();
3985       else
3986         Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
3987     }
3988     auto Arg2 = EmitScalarExpr(E->getArg(2));
3989     auto PTy2 = FTy->getParamType(2);
3990     if (PTy2 != Arg2->getType())
3991       Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
3992     return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
3993   }
3994 
3995   case Builtin::BI__builtin_ms_va_start:
3996   case Builtin::BI__builtin_ms_va_end:
3997     return RValue::get(
3998         EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
3999                        BuiltinID == Builtin::BI__builtin_ms_va_start));
4000 
4001   case Builtin::BI__builtin_ms_va_copy: {
4002     // Lower this manually. We can't reliably determine whether or not any
4003     // given va_copy() is for a Win64 va_list from the calling convention
4004     // alone, because it's legal to do this from a System V ABI function.
4005     // With opaque pointer types, we won't have enough information in LLVM
4006     // IR to determine this from the argument types, either. Best to do it
4007     // now, while we have enough information.
4008     Address DestAddr = EmitMSVAListRef(E->getArg(0));
4009     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
4010 
4011     llvm::Type *BPP = Int8PtrPtrTy;
4012 
4013     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
4014                        DestAddr.getAlignment());
4015     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
4016                       SrcAddr.getAlignment());
4017 
4018     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
4019     return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
4020   }
4021   }
4022 
4023   // If this is an alias for a lib function (e.g. __builtin_sin), emit
4024   // the call using the normal call path, but using the unmangled
4025   // version of the function name.
4026   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
4027     return emitLibraryCall(*this, FD, E,
4028                            CGM.getBuiltinLibFunction(FD, BuiltinID));
4029 
4030   // If this is a predefined lib function (e.g. malloc), emit the call
4031   // using exactly the normal call path.
4032   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
4033     return emitLibraryCall(*this, FD, E,
4034                       cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
4035 
4036   // Check that a call to a target specific builtin has the correct target
4037   // features.
4038   // This is down here to avoid non-target specific builtins, however, if
4039   // generic builtins start to require generic target features then we
4040   // can move this up to the beginning of the function.
4041   checkTargetFeatures(E, FD);
4042 
4043   if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
4044     LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
4045 
4046   // See if we have a target specific intrinsic.
4047   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
4048   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
4049   StringRef Prefix =
4050       llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
4051   if (!Prefix.empty()) {
4052     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
4053     // NOTE we don't need to perform a compatibility flag check here since the
4054     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
4055     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
4056     if (IntrinsicID == Intrinsic::not_intrinsic)
4057       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
4058   }
4059 
4060   if (IntrinsicID != Intrinsic::not_intrinsic) {
4061     SmallVector<Value*, 16> Args;
4062 
4063     // Find out if any arguments are required to be integer constant
4064     // expressions.
4065     unsigned ICEArguments = 0;
4066     ASTContext::GetBuiltinTypeError Error;
4067     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4068     assert(Error == ASTContext::GE_None && "Should not codegen an error");
4069 
4070     Function *F = CGM.getIntrinsic(IntrinsicID);
4071     llvm::FunctionType *FTy = F->getFunctionType();
4072 
4073     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
4074       Value *ArgValue;
4075       // If this is a normal argument, just emit it as a scalar.
4076       if ((ICEArguments & (1 << i)) == 0) {
4077         ArgValue = EmitScalarExpr(E->getArg(i));
4078       } else {
4079         // If this is required to be a constant, constant fold it so that we
4080         // know that the generated intrinsic gets a ConstantInt.
4081         llvm::APSInt Result;
4082         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
4083         assert(IsConst && "Constant arg isn't actually constant?");
4084         (void)IsConst;
4085         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
4086       }
4087 
4088       // If the intrinsic arg type is different from the builtin arg type
4089       // we need to do a bit cast.
4090       llvm::Type *PTy = FTy->getParamType(i);
4091       if (PTy != ArgValue->getType()) {
4092         // XXX - vector of pointers?
4093         if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
4094           if (PtrTy->getAddressSpace() !=
4095               ArgValue->getType()->getPointerAddressSpace()) {
4096             ArgValue = Builder.CreateAddrSpaceCast(
4097               ArgValue,
4098               ArgValue->getType()->getPointerTo(PtrTy->getAddressSpace()));
4099           }
4100         }
4101 
4102         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
4103                "Must be able to losslessly bit cast to param");
4104         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
4105       }
4106 
4107       Args.push_back(ArgValue);
4108     }
4109 
4110     Value *V = Builder.CreateCall(F, Args);
4111     QualType BuiltinRetType = E->getType();
4112 
4113     llvm::Type *RetTy = VoidTy;
4114     if (!BuiltinRetType->isVoidType())
4115       RetTy = ConvertType(BuiltinRetType);
4116 
4117     if (RetTy != V->getType()) {
4118       // XXX - vector of pointers?
4119       if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
4120         if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
4121           V = Builder.CreateAddrSpaceCast(
4122             V, V->getType()->getPointerTo(PtrTy->getAddressSpace()));
4123         }
4124       }
4125 
4126       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
4127              "Must be able to losslessly bit cast result type");
4128       V = Builder.CreateBitCast(V, RetTy);
4129     }
4130 
4131     return RValue::get(V);
4132   }
4133 
4134   // See if we have a target specific builtin that needs to be lowered.
4135   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
4136     return RValue::get(V);
4137 
4138   ErrorUnsupported(E, "builtin function");
4139 
4140   // Unknown builtin, for now just dump it out and return undef.
4141   return GetUndefRValue(E->getType());
4142 }
4143 
4144 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
4145                                         unsigned BuiltinID, const CallExpr *E,
4146                                         llvm::Triple::ArchType Arch) {
4147   switch (Arch) {
4148   case llvm::Triple::arm:
4149   case llvm::Triple::armeb:
4150   case llvm::Triple::thumb:
4151   case llvm::Triple::thumbeb:
4152     return CGF->EmitARMBuiltinExpr(BuiltinID, E, Arch);
4153   case llvm::Triple::aarch64:
4154   case llvm::Triple::aarch64_be:
4155     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
4156   case llvm::Triple::x86:
4157   case llvm::Triple::x86_64:
4158     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
4159   case llvm::Triple::ppc:
4160   case llvm::Triple::ppc64:
4161   case llvm::Triple::ppc64le:
4162     return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
4163   case llvm::Triple::r600:
4164   case llvm::Triple::amdgcn:
4165     return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
4166   case llvm::Triple::systemz:
4167     return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
4168   case llvm::Triple::nvptx:
4169   case llvm::Triple::nvptx64:
4170     return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
4171   case llvm::Triple::wasm32:
4172   case llvm::Triple::wasm64:
4173     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
4174   case llvm::Triple::hexagon:
4175     return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
4176   default:
4177     return nullptr;
4178   }
4179 }
4180 
4181 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
4182                                               const CallExpr *E) {
4183   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
4184     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
4185     return EmitTargetArchBuiltinExpr(
4186         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
4187         getContext().getAuxTargetInfo()->getTriple().getArch());
4188   }
4189 
4190   return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
4191                                    getTarget().getTriple().getArch());
4192 }
4193 
4194 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
4195                                      NeonTypeFlags TypeFlags,
4196                                      bool HasLegalHalfType=true,
4197                                      bool V1Ty=false) {
4198   int IsQuad = TypeFlags.isQuad();
4199   switch (TypeFlags.getEltType()) {
4200   case NeonTypeFlags::Int8:
4201   case NeonTypeFlags::Poly8:
4202     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
4203   case NeonTypeFlags::Int16:
4204   case NeonTypeFlags::Poly16:
4205     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
4206   case NeonTypeFlags::Float16:
4207     if (HasLegalHalfType)
4208       return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
4209     else
4210       return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
4211   case NeonTypeFlags::Int32:
4212     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
4213   case NeonTypeFlags::Int64:
4214   case NeonTypeFlags::Poly64:
4215     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
4216   case NeonTypeFlags::Poly128:
4217     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
4218     // There is a lot of i128 and f128 API missing.
4219     // so we use v16i8 to represent poly128 and get pattern matched.
4220     return llvm::VectorType::get(CGF->Int8Ty, 16);
4221   case NeonTypeFlags::Float32:
4222     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
4223   case NeonTypeFlags::Float64:
4224     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
4225   }
4226   llvm_unreachable("Unknown vector element type!");
4227 }
4228 
4229 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
4230                                           NeonTypeFlags IntTypeFlags) {
4231   int IsQuad = IntTypeFlags.isQuad();
4232   switch (IntTypeFlags.getEltType()) {
4233   case NeonTypeFlags::Int16:
4234     return llvm::VectorType::get(CGF->HalfTy, (4 << IsQuad));
4235   case NeonTypeFlags::Int32:
4236     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
4237   case NeonTypeFlags::Int64:
4238     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
4239   default:
4240     llvm_unreachable("Type can't be converted to floating-point!");
4241   }
4242 }
4243 
4244 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
4245   unsigned nElts = V->getType()->getVectorNumElements();
4246   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
4247   return Builder.CreateShuffleVector(V, V, SV, "lane");
4248 }
4249 
4250 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
4251                                      const char *name,
4252                                      unsigned shift, bool rightshift) {
4253   unsigned j = 0;
4254   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
4255        ai != ae; ++ai, ++j)
4256     if (shift > 0 && shift == j)
4257       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
4258     else
4259       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
4260 
4261   return Builder.CreateCall(F, Ops, name);
4262 }
4263 
4264 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
4265                                             bool neg) {
4266   int SV = cast<ConstantInt>(V)->getSExtValue();
4267   return ConstantInt::get(Ty, neg ? -SV : SV);
4268 }
4269 
4270 // Right-shift a vector by a constant.
4271 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
4272                                           llvm::Type *Ty, bool usgn,
4273                                           const char *name) {
4274   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
4275 
4276   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
4277   int EltSize = VTy->getScalarSizeInBits();
4278 
4279   Vec = Builder.CreateBitCast(Vec, Ty);
4280 
4281   // lshr/ashr are undefined when the shift amount is equal to the vector
4282   // element size.
4283   if (ShiftAmt == EltSize) {
4284     if (usgn) {
4285       // Right-shifting an unsigned value by its size yields 0.
4286       return llvm::ConstantAggregateZero::get(VTy);
4287     } else {
4288       // Right-shifting a signed value by its size is equivalent
4289       // to a shift of size-1.
4290       --ShiftAmt;
4291       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
4292     }
4293   }
4294 
4295   Shift = EmitNeonShiftVector(Shift, Ty, false);
4296   if (usgn)
4297     return Builder.CreateLShr(Vec, Shift, name);
4298   else
4299     return Builder.CreateAShr(Vec, Shift, name);
4300 }
4301 
4302 enum {
4303   AddRetType = (1 << 0),
4304   Add1ArgType = (1 << 1),
4305   Add2ArgTypes = (1 << 2),
4306 
4307   VectorizeRetType = (1 << 3),
4308   VectorizeArgTypes = (1 << 4),
4309 
4310   InventFloatType = (1 << 5),
4311   UnsignedAlts = (1 << 6),
4312 
4313   Use64BitVectors = (1 << 7),
4314   Use128BitVectors = (1 << 8),
4315 
4316   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
4317   VectorRet = AddRetType | VectorizeRetType,
4318   VectorRetGetArgs01 =
4319       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
4320   FpCmpzModifiers =
4321       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
4322 };
4323 
4324 namespace {
4325 struct NeonIntrinsicInfo {
4326   const char *NameHint;
4327   unsigned BuiltinID;
4328   unsigned LLVMIntrinsic;
4329   unsigned AltLLVMIntrinsic;
4330   unsigned TypeModifier;
4331 
4332   bool operator<(unsigned RHSBuiltinID) const {
4333     return BuiltinID < RHSBuiltinID;
4334   }
4335   bool operator<(const NeonIntrinsicInfo &TE) const {
4336     return BuiltinID < TE.BuiltinID;
4337   }
4338 };
4339 } // end anonymous namespace
4340 
4341 #define NEONMAP0(NameBase) \
4342   { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
4343 
4344 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
4345   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
4346       Intrinsic::LLVMIntrinsic, 0, TypeModifier }
4347 
4348 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
4349   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
4350       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
4351       TypeModifier }
4352 
4353 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
4354   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
4355   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
4356   NEONMAP1(vabs_v, arm_neon_vabs, 0),
4357   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
4358   NEONMAP0(vaddhn_v),
4359   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
4360   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
4361   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
4362   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
4363   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
4364   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
4365   NEONMAP1(vcage_v, arm_neon_vacge, 0),
4366   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
4367   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
4368   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
4369   NEONMAP1(vcale_v, arm_neon_vacge, 0),
4370   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
4371   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
4372   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
4373   NEONMAP0(vceqz_v),
4374   NEONMAP0(vceqzq_v),
4375   NEONMAP0(vcgez_v),
4376   NEONMAP0(vcgezq_v),
4377   NEONMAP0(vcgtz_v),
4378   NEONMAP0(vcgtzq_v),
4379   NEONMAP0(vclez_v),
4380   NEONMAP0(vclezq_v),
4381   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
4382   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
4383   NEONMAP0(vcltz_v),
4384   NEONMAP0(vcltzq_v),
4385   NEONMAP1(vclz_v, ctlz, Add1ArgType),
4386   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
4387   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
4388   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
4389   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
4390   NEONMAP0(vcvt_f16_v),
4391   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
4392   NEONMAP0(vcvt_f32_v),
4393   NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
4394   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
4395   NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0),
4396   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
4397   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
4398   NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0),
4399   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
4400   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
4401   NEONMAP0(vcvt_s16_v),
4402   NEONMAP0(vcvt_s32_v),
4403   NEONMAP0(vcvt_s64_v),
4404   NEONMAP0(vcvt_u16_v),
4405   NEONMAP0(vcvt_u32_v),
4406   NEONMAP0(vcvt_u64_v),
4407   NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0),
4408   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
4409   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
4410   NEONMAP1(vcvta_u16_v, arm_neon_vcvtau, 0),
4411   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
4412   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
4413   NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0),
4414   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
4415   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
4416   NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0),
4417   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
4418   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
4419   NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0),
4420   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
4421   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
4422   NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0),
4423   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
4424   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
4425   NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0),
4426   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
4427   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
4428   NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0),
4429   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
4430   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
4431   NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0),
4432   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
4433   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
4434   NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0),
4435   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
4436   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
4437   NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0),
4438   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
4439   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
4440   NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0),
4441   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
4442   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
4443   NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0),
4444   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
4445   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
4446   NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0),
4447   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
4448   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
4449   NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0),
4450   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
4451   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
4452   NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0),
4453   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
4454   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
4455   NEONMAP0(vcvtq_f16_v),
4456   NEONMAP0(vcvtq_f32_v),
4457   NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
4458   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
4459   NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0),
4460   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
4461   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
4462   NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0),
4463   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
4464   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
4465   NEONMAP0(vcvtq_s16_v),
4466   NEONMAP0(vcvtq_s32_v),
4467   NEONMAP0(vcvtq_s64_v),
4468   NEONMAP0(vcvtq_u16_v),
4469   NEONMAP0(vcvtq_u32_v),
4470   NEONMAP0(vcvtq_u64_v),
4471   NEONMAP2(vdot_v, arm_neon_udot, arm_neon_sdot, 0),
4472   NEONMAP2(vdotq_v, arm_neon_udot, arm_neon_sdot, 0),
4473   NEONMAP0(vext_v),
4474   NEONMAP0(vextq_v),
4475   NEONMAP0(vfma_v),
4476   NEONMAP0(vfmaq_v),
4477   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
4478   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
4479   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
4480   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
4481   NEONMAP0(vld1_dup_v),
4482   NEONMAP1(vld1_v, arm_neon_vld1, 0),
4483   NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
4484   NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
4485   NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
4486   NEONMAP0(vld1q_dup_v),
4487   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
4488   NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
4489   NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
4490   NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
4491   NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
4492   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
4493   NEONMAP1(vld2_v, arm_neon_vld2, 0),
4494   NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
4495   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
4496   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
4497   NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
4498   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
4499   NEONMAP1(vld3_v, arm_neon_vld3, 0),
4500   NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
4501   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
4502   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
4503   NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
4504   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
4505   NEONMAP1(vld4_v, arm_neon_vld4, 0),
4506   NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
4507   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
4508   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
4509   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
4510   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
4511   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
4512   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
4513   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
4514   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
4515   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
4516   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
4517   NEONMAP0(vmovl_v),
4518   NEONMAP0(vmovn_v),
4519   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
4520   NEONMAP0(vmull_v),
4521   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
4522   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
4523   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
4524   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
4525   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
4526   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
4527   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
4528   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
4529   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
4530   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
4531   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
4532   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
4533   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
4534   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
4535   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
4536   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
4537   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
4538   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
4539   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
4540   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
4541   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
4542   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
4543   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
4544   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
4545   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
4546   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
4547   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
4548   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
4549   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
4550   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
4551   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
4552   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
4553   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
4554   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
4555   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
4556   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
4557   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
4558   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
4559   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
4560   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
4561   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
4562   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
4563   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
4564   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
4565   NEONMAP0(vrndi_v),
4566   NEONMAP0(vrndiq_v),
4567   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
4568   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
4569   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
4570   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
4571   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
4572   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
4573   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
4574   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
4575   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
4576   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
4577   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
4578   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
4579   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
4580   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
4581   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
4582   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
4583   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
4584   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
4585   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
4586   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
4587   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
4588   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
4589   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
4590   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
4591   NEONMAP0(vshl_n_v),
4592   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
4593   NEONMAP0(vshll_n_v),
4594   NEONMAP0(vshlq_n_v),
4595   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
4596   NEONMAP0(vshr_n_v),
4597   NEONMAP0(vshrn_n_v),
4598   NEONMAP0(vshrq_n_v),
4599   NEONMAP1(vst1_v, arm_neon_vst1, 0),
4600   NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
4601   NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
4602   NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
4603   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
4604   NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
4605   NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
4606   NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
4607   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
4608   NEONMAP1(vst2_v, arm_neon_vst2, 0),
4609   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
4610   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
4611   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
4612   NEONMAP1(vst3_v, arm_neon_vst3, 0),
4613   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
4614   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
4615   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
4616   NEONMAP1(vst4_v, arm_neon_vst4, 0),
4617   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
4618   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
4619   NEONMAP0(vsubhn_v),
4620   NEONMAP0(vtrn_v),
4621   NEONMAP0(vtrnq_v),
4622   NEONMAP0(vtst_v),
4623   NEONMAP0(vtstq_v),
4624   NEONMAP0(vuzp_v),
4625   NEONMAP0(vuzpq_v),
4626   NEONMAP0(vzip_v),
4627   NEONMAP0(vzipq_v)
4628 };
4629 
4630 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
4631   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
4632   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
4633   NEONMAP0(vaddhn_v),
4634   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
4635   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
4636   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
4637   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
4638   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
4639   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
4640   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
4641   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
4642   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
4643   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
4644   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
4645   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
4646   NEONMAP0(vceqz_v),
4647   NEONMAP0(vceqzq_v),
4648   NEONMAP0(vcgez_v),
4649   NEONMAP0(vcgezq_v),
4650   NEONMAP0(vcgtz_v),
4651   NEONMAP0(vcgtzq_v),
4652   NEONMAP0(vclez_v),
4653   NEONMAP0(vclezq_v),
4654   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
4655   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
4656   NEONMAP0(vcltz_v),
4657   NEONMAP0(vcltzq_v),
4658   NEONMAP1(vclz_v, ctlz, Add1ArgType),
4659   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
4660   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
4661   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
4662   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
4663   NEONMAP0(vcvt_f16_v),
4664   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
4665   NEONMAP0(vcvt_f32_v),
4666   NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4667   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4668   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4669   NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
4670   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
4671   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
4672   NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
4673   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
4674   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
4675   NEONMAP0(vcvtq_f16_v),
4676   NEONMAP0(vcvtq_f32_v),
4677   NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4678   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4679   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4680   NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
4681   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
4682   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
4683   NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
4684   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
4685   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
4686   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
4687   NEONMAP2(vdot_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
4688   NEONMAP2(vdotq_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
4689   NEONMAP0(vext_v),
4690   NEONMAP0(vextq_v),
4691   NEONMAP0(vfma_v),
4692   NEONMAP0(vfmaq_v),
4693   NEONMAP1(vfmlal_high_v, aarch64_neon_fmlal2, 0),
4694   NEONMAP1(vfmlal_low_v, aarch64_neon_fmlal, 0),
4695   NEONMAP1(vfmlalq_high_v, aarch64_neon_fmlal2, 0),
4696   NEONMAP1(vfmlalq_low_v, aarch64_neon_fmlal, 0),
4697   NEONMAP1(vfmlsl_high_v, aarch64_neon_fmlsl2, 0),
4698   NEONMAP1(vfmlsl_low_v, aarch64_neon_fmlsl, 0),
4699   NEONMAP1(vfmlslq_high_v, aarch64_neon_fmlsl2, 0),
4700   NEONMAP1(vfmlslq_low_v, aarch64_neon_fmlsl, 0),
4701   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
4702   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
4703   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
4704   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
4705   NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
4706   NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
4707   NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
4708   NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
4709   NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
4710   NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
4711   NEONMAP0(vmovl_v),
4712   NEONMAP0(vmovn_v),
4713   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
4714   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
4715   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
4716   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
4717   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
4718   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
4719   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
4720   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
4721   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
4722   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
4723   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
4724   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
4725   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
4726   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
4727   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
4728   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
4729   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
4730   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
4731   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
4732   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
4733   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
4734   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
4735   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
4736   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
4737   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
4738   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
4739   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
4740   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
4741   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
4742   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
4743   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
4744   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
4745   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
4746   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
4747   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
4748   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
4749   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
4750   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
4751   NEONMAP0(vrndi_v),
4752   NEONMAP0(vrndiq_v),
4753   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
4754   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
4755   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
4756   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
4757   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
4758   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
4759   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
4760   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
4761   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
4762   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
4763   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
4764   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
4765   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
4766   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
4767   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
4768   NEONMAP0(vshl_n_v),
4769   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
4770   NEONMAP0(vshll_n_v),
4771   NEONMAP0(vshlq_n_v),
4772   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
4773   NEONMAP0(vshr_n_v),
4774   NEONMAP0(vshrn_n_v),
4775   NEONMAP0(vshrq_n_v),
4776   NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
4777   NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
4778   NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
4779   NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
4780   NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
4781   NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
4782   NEONMAP0(vsubhn_v),
4783   NEONMAP0(vtst_v),
4784   NEONMAP0(vtstq_v),
4785 };
4786 
4787 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
4788   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
4789   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
4790   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
4791   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
4792   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
4793   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
4794   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
4795   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
4796   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
4797   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4798   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
4799   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
4800   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
4801   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
4802   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4803   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4804   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
4805   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
4806   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
4807   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
4808   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
4809   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
4810   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
4811   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
4812   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
4813   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
4814   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
4815   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
4816   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4817   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4818   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4819   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4820   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4821   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4822   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4823   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4824   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4825   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
4826   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4827   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
4828   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
4829   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
4830   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
4831   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
4832   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4833   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4834   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4835   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4836   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
4837   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4838   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4839   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4840   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4841   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
4842   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
4843   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4844   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4845   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
4846   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
4847   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4848   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4849   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4850   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
4851   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
4852   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
4853   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
4854   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
4855   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
4856   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
4857   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
4858   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
4859   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
4860   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4861   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4862   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4863   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4864   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4865   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4866   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4867   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4868   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
4869   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
4870   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
4871   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
4872   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
4873   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
4874   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
4875   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
4876   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
4877   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
4878   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
4879   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
4880   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
4881   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
4882   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
4883   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
4884   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
4885   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
4886   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
4887   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
4888   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
4889   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
4890   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
4891   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
4892   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
4893   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
4894   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
4895   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
4896   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
4897   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
4898   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
4899   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
4900   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
4901   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
4902   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
4903   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
4904   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
4905   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
4906   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
4907   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
4908   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
4909   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
4910   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
4911   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
4912   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
4913   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
4914   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
4915   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
4916   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
4917   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
4918   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4919   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4920   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4921   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4922   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
4923   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
4924   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4925   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4926   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4927   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4928   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
4929   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
4930   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
4931   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
4932   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
4933   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
4934   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
4935   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
4936   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
4937   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
4938   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
4939   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
4940   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
4941   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
4942   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
4943   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
4944   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
4945   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
4946   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
4947   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
4948   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
4949   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
4950   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
4951   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
4952   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
4953   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
4954   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
4955   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
4956   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
4957   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
4958   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
4959   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
4960   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
4961   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
4962   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
4963   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
4964   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
4965   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
4966   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
4967   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
4968   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
4969   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
4970   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
4971   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
4972   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
4973   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
4974   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
4975   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
4976   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
4977   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
4978   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
4979   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
4980   // FP16 scalar intrinisics go here.
4981   NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
4982   NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
4983   NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
4984   NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
4985   NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
4986   NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4987   NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4988   NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4989   NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4990   NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4991   NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4992   NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4993   NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4994   NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4995   NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4996   NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4997   NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4998   NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4999   NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
5000   NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
5001   NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
5002   NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
5003   NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
5004   NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
5005   NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
5006   NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
5007   NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
5008   NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
5009   NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
5010   NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
5011 };
5012 
5013 #undef NEONMAP0
5014 #undef NEONMAP1
5015 #undef NEONMAP2
5016 
5017 static bool NEONSIMDIntrinsicsProvenSorted = false;
5018 
5019 static bool AArch64SIMDIntrinsicsProvenSorted = false;
5020 static bool AArch64SISDIntrinsicsProvenSorted = false;
5021 
5022 
5023 static const NeonIntrinsicInfo *
5024 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
5025                        unsigned BuiltinID, bool &MapProvenSorted) {
5026 
5027 #ifndef NDEBUG
5028   if (!MapProvenSorted) {
5029     assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
5030     MapProvenSorted = true;
5031   }
5032 #endif
5033 
5034   const NeonIntrinsicInfo *Builtin =
5035       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
5036 
5037   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
5038     return Builtin;
5039 
5040   return nullptr;
5041 }
5042 
5043 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
5044                                                    unsigned Modifier,
5045                                                    llvm::Type *ArgType,
5046                                                    const CallExpr *E) {
5047   int VectorSize = 0;
5048   if (Modifier & Use64BitVectors)
5049     VectorSize = 64;
5050   else if (Modifier & Use128BitVectors)
5051     VectorSize = 128;
5052 
5053   // Return type.
5054   SmallVector<llvm::Type *, 3> Tys;
5055   if (Modifier & AddRetType) {
5056     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
5057     if (Modifier & VectorizeRetType)
5058       Ty = llvm::VectorType::get(
5059           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
5060 
5061     Tys.push_back(Ty);
5062   }
5063 
5064   // Arguments.
5065   if (Modifier & VectorizeArgTypes) {
5066     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
5067     ArgType = llvm::VectorType::get(ArgType, Elts);
5068   }
5069 
5070   if (Modifier & (Add1ArgType | Add2ArgTypes))
5071     Tys.push_back(ArgType);
5072 
5073   if (Modifier & Add2ArgTypes)
5074     Tys.push_back(ArgType);
5075 
5076   if (Modifier & InventFloatType)
5077     Tys.push_back(FloatTy);
5078 
5079   return CGM.getIntrinsic(IntrinsicID, Tys);
5080 }
5081 
5082 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
5083                                             const NeonIntrinsicInfo &SISDInfo,
5084                                             SmallVectorImpl<Value *> &Ops,
5085                                             const CallExpr *E) {
5086   unsigned BuiltinID = SISDInfo.BuiltinID;
5087   unsigned int Int = SISDInfo.LLVMIntrinsic;
5088   unsigned Modifier = SISDInfo.TypeModifier;
5089   const char *s = SISDInfo.NameHint;
5090 
5091   switch (BuiltinID) {
5092   case NEON::BI__builtin_neon_vcled_s64:
5093   case NEON::BI__builtin_neon_vcled_u64:
5094   case NEON::BI__builtin_neon_vcles_f32:
5095   case NEON::BI__builtin_neon_vcled_f64:
5096   case NEON::BI__builtin_neon_vcltd_s64:
5097   case NEON::BI__builtin_neon_vcltd_u64:
5098   case NEON::BI__builtin_neon_vclts_f32:
5099   case NEON::BI__builtin_neon_vcltd_f64:
5100   case NEON::BI__builtin_neon_vcales_f32:
5101   case NEON::BI__builtin_neon_vcaled_f64:
5102   case NEON::BI__builtin_neon_vcalts_f32:
5103   case NEON::BI__builtin_neon_vcaltd_f64:
5104     // Only one direction of comparisons actually exist, cmle is actually a cmge
5105     // with swapped operands. The table gives us the right intrinsic but we
5106     // still need to do the swap.
5107     std::swap(Ops[0], Ops[1]);
5108     break;
5109   }
5110 
5111   assert(Int && "Generic code assumes a valid intrinsic");
5112 
5113   // Determine the type(s) of this overloaded AArch64 intrinsic.
5114   const Expr *Arg = E->getArg(0);
5115   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
5116   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
5117 
5118   int j = 0;
5119   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
5120   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
5121        ai != ae; ++ai, ++j) {
5122     llvm::Type *ArgTy = ai->getType();
5123     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
5124              ArgTy->getPrimitiveSizeInBits())
5125       continue;
5126 
5127     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
5128     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
5129     // it before inserting.
5130     Ops[j] =
5131         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
5132     Ops[j] =
5133         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
5134   }
5135 
5136   Value *Result = CGF.EmitNeonCall(F, Ops, s);
5137   llvm::Type *ResultType = CGF.ConvertType(E->getType());
5138   if (ResultType->getPrimitiveSizeInBits() <
5139       Result->getType()->getPrimitiveSizeInBits())
5140     return CGF.Builder.CreateExtractElement(Result, C0);
5141 
5142   return CGF.Builder.CreateBitCast(Result, ResultType, s);
5143 }
5144 
5145 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
5146     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
5147     const char *NameHint, unsigned Modifier, const CallExpr *E,
5148     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
5149     llvm::Triple::ArchType Arch) {
5150   // Get the last argument, which specifies the vector type.
5151   llvm::APSInt NeonTypeConst;
5152   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
5153   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
5154     return nullptr;
5155 
5156   // Determine the type of this overloaded NEON intrinsic.
5157   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
5158   bool Usgn = Type.isUnsigned();
5159   bool Quad = Type.isQuad();
5160   const bool HasLegalHalfType = getTarget().hasLegalHalfType();
5161 
5162   llvm::VectorType *VTy = GetNeonType(this, Type, HasLegalHalfType);
5163   llvm::Type *Ty = VTy;
5164   if (!Ty)
5165     return nullptr;
5166 
5167   auto getAlignmentValue32 = [&](Address addr) -> Value* {
5168     return Builder.getInt32(addr.getAlignment().getQuantity());
5169   };
5170 
5171   unsigned Int = LLVMIntrinsic;
5172   if ((Modifier & UnsignedAlts) && !Usgn)
5173     Int = AltLLVMIntrinsic;
5174 
5175   switch (BuiltinID) {
5176   default: break;
5177   case NEON::BI__builtin_neon_vpadd_v:
5178   case NEON::BI__builtin_neon_vpaddq_v:
5179     // We don't allow fp/int overloading of intrinsics.
5180     if (VTy->getElementType()->isFloatingPointTy() &&
5181         Int == Intrinsic::aarch64_neon_addp)
5182       Int = Intrinsic::aarch64_neon_faddp;
5183     break;
5184   case NEON::BI__builtin_neon_vabs_v:
5185   case NEON::BI__builtin_neon_vabsq_v:
5186     if (VTy->getElementType()->isFloatingPointTy())
5187       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
5188     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
5189   case NEON::BI__builtin_neon_vaddhn_v: {
5190     llvm::VectorType *SrcTy =
5191         llvm::VectorType::getExtendedElementVectorType(VTy);
5192 
5193     // %sum = add <4 x i32> %lhs, %rhs
5194     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
5195     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
5196     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
5197 
5198     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
5199     Constant *ShiftAmt =
5200         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
5201     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
5202 
5203     // %res = trunc <4 x i32> %high to <4 x i16>
5204     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
5205   }
5206   case NEON::BI__builtin_neon_vcale_v:
5207   case NEON::BI__builtin_neon_vcaleq_v:
5208   case NEON::BI__builtin_neon_vcalt_v:
5209   case NEON::BI__builtin_neon_vcaltq_v:
5210     std::swap(Ops[0], Ops[1]);
5211     LLVM_FALLTHROUGH;
5212   case NEON::BI__builtin_neon_vcage_v:
5213   case NEON::BI__builtin_neon_vcageq_v:
5214   case NEON::BI__builtin_neon_vcagt_v:
5215   case NEON::BI__builtin_neon_vcagtq_v: {
5216     llvm::Type *Ty;
5217     switch (VTy->getScalarSizeInBits()) {
5218     default: llvm_unreachable("unexpected type");
5219     case 32:
5220       Ty = FloatTy;
5221       break;
5222     case 64:
5223       Ty = DoubleTy;
5224       break;
5225     case 16:
5226       Ty = HalfTy;
5227       break;
5228     }
5229     llvm::Type *VecFlt = llvm::VectorType::get(Ty, VTy->getNumElements());
5230     llvm::Type *Tys[] = { VTy, VecFlt };
5231     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
5232     return EmitNeonCall(F, Ops, NameHint);
5233   }
5234   case NEON::BI__builtin_neon_vceqz_v:
5235   case NEON::BI__builtin_neon_vceqzq_v:
5236     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
5237                                          ICmpInst::ICMP_EQ, "vceqz");
5238   case NEON::BI__builtin_neon_vcgez_v:
5239   case NEON::BI__builtin_neon_vcgezq_v:
5240     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
5241                                          ICmpInst::ICMP_SGE, "vcgez");
5242   case NEON::BI__builtin_neon_vclez_v:
5243   case NEON::BI__builtin_neon_vclezq_v:
5244     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
5245                                          ICmpInst::ICMP_SLE, "vclez");
5246   case NEON::BI__builtin_neon_vcgtz_v:
5247   case NEON::BI__builtin_neon_vcgtzq_v:
5248     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
5249                                          ICmpInst::ICMP_SGT, "vcgtz");
5250   case NEON::BI__builtin_neon_vcltz_v:
5251   case NEON::BI__builtin_neon_vcltzq_v:
5252     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
5253                                          ICmpInst::ICMP_SLT, "vcltz");
5254   case NEON::BI__builtin_neon_vclz_v:
5255   case NEON::BI__builtin_neon_vclzq_v:
5256     // We generate target-independent intrinsic, which needs a second argument
5257     // for whether or not clz of zero is undefined; on ARM it isn't.
5258     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
5259     break;
5260   case NEON::BI__builtin_neon_vcvt_f32_v:
5261   case NEON::BI__builtin_neon_vcvtq_f32_v:
5262     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5263     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
5264                      HasLegalHalfType);
5265     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
5266                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
5267   case NEON::BI__builtin_neon_vcvt_f16_v:
5268   case NEON::BI__builtin_neon_vcvtq_f16_v:
5269     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5270     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
5271                      HasLegalHalfType);
5272     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
5273                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
5274   case NEON::BI__builtin_neon_vcvt_n_f16_v:
5275   case NEON::BI__builtin_neon_vcvt_n_f32_v:
5276   case NEON::BI__builtin_neon_vcvt_n_f64_v:
5277   case NEON::BI__builtin_neon_vcvtq_n_f16_v:
5278   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
5279   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
5280     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
5281     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
5282     Function *F = CGM.getIntrinsic(Int, Tys);
5283     return EmitNeonCall(F, Ops, "vcvt_n");
5284   }
5285   case NEON::BI__builtin_neon_vcvt_n_s16_v:
5286   case NEON::BI__builtin_neon_vcvt_n_s32_v:
5287   case NEON::BI__builtin_neon_vcvt_n_u16_v:
5288   case NEON::BI__builtin_neon_vcvt_n_u32_v:
5289   case NEON::BI__builtin_neon_vcvt_n_s64_v:
5290   case NEON::BI__builtin_neon_vcvt_n_u64_v:
5291   case NEON::BI__builtin_neon_vcvtq_n_s16_v:
5292   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
5293   case NEON::BI__builtin_neon_vcvtq_n_u16_v:
5294   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
5295   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
5296   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
5297     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5298     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
5299     return EmitNeonCall(F, Ops, "vcvt_n");
5300   }
5301   case NEON::BI__builtin_neon_vcvt_s32_v:
5302   case NEON::BI__builtin_neon_vcvt_u32_v:
5303   case NEON::BI__builtin_neon_vcvt_s64_v:
5304   case NEON::BI__builtin_neon_vcvt_u64_v:
5305   case NEON::BI__builtin_neon_vcvt_s16_v:
5306   case NEON::BI__builtin_neon_vcvt_u16_v:
5307   case NEON::BI__builtin_neon_vcvtq_s32_v:
5308   case NEON::BI__builtin_neon_vcvtq_u32_v:
5309   case NEON::BI__builtin_neon_vcvtq_s64_v:
5310   case NEON::BI__builtin_neon_vcvtq_u64_v:
5311   case NEON::BI__builtin_neon_vcvtq_s16_v:
5312   case NEON::BI__builtin_neon_vcvtq_u16_v: {
5313     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
5314     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
5315                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
5316   }
5317   case NEON::BI__builtin_neon_vcvta_s16_v:
5318   case NEON::BI__builtin_neon_vcvta_s32_v:
5319   case NEON::BI__builtin_neon_vcvta_s64_v:
5320   case NEON::BI__builtin_neon_vcvta_u16_v:
5321   case NEON::BI__builtin_neon_vcvta_u32_v:
5322   case NEON::BI__builtin_neon_vcvta_u64_v:
5323   case NEON::BI__builtin_neon_vcvtaq_s16_v:
5324   case NEON::BI__builtin_neon_vcvtaq_s32_v:
5325   case NEON::BI__builtin_neon_vcvtaq_s64_v:
5326   case NEON::BI__builtin_neon_vcvtaq_u16_v:
5327   case NEON::BI__builtin_neon_vcvtaq_u32_v:
5328   case NEON::BI__builtin_neon_vcvtaq_u64_v:
5329   case NEON::BI__builtin_neon_vcvtn_s16_v:
5330   case NEON::BI__builtin_neon_vcvtn_s32_v:
5331   case NEON::BI__builtin_neon_vcvtn_s64_v:
5332   case NEON::BI__builtin_neon_vcvtn_u16_v:
5333   case NEON::BI__builtin_neon_vcvtn_u32_v:
5334   case NEON::BI__builtin_neon_vcvtn_u64_v:
5335   case NEON::BI__builtin_neon_vcvtnq_s16_v:
5336   case NEON::BI__builtin_neon_vcvtnq_s32_v:
5337   case NEON::BI__builtin_neon_vcvtnq_s64_v:
5338   case NEON::BI__builtin_neon_vcvtnq_u16_v:
5339   case NEON::BI__builtin_neon_vcvtnq_u32_v:
5340   case NEON::BI__builtin_neon_vcvtnq_u64_v:
5341   case NEON::BI__builtin_neon_vcvtp_s16_v:
5342   case NEON::BI__builtin_neon_vcvtp_s32_v:
5343   case NEON::BI__builtin_neon_vcvtp_s64_v:
5344   case NEON::BI__builtin_neon_vcvtp_u16_v:
5345   case NEON::BI__builtin_neon_vcvtp_u32_v:
5346   case NEON::BI__builtin_neon_vcvtp_u64_v:
5347   case NEON::BI__builtin_neon_vcvtpq_s16_v:
5348   case NEON::BI__builtin_neon_vcvtpq_s32_v:
5349   case NEON::BI__builtin_neon_vcvtpq_s64_v:
5350   case NEON::BI__builtin_neon_vcvtpq_u16_v:
5351   case NEON::BI__builtin_neon_vcvtpq_u32_v:
5352   case NEON::BI__builtin_neon_vcvtpq_u64_v:
5353   case NEON::BI__builtin_neon_vcvtm_s16_v:
5354   case NEON::BI__builtin_neon_vcvtm_s32_v:
5355   case NEON::BI__builtin_neon_vcvtm_s64_v:
5356   case NEON::BI__builtin_neon_vcvtm_u16_v:
5357   case NEON::BI__builtin_neon_vcvtm_u32_v:
5358   case NEON::BI__builtin_neon_vcvtm_u64_v:
5359   case NEON::BI__builtin_neon_vcvtmq_s16_v:
5360   case NEON::BI__builtin_neon_vcvtmq_s32_v:
5361   case NEON::BI__builtin_neon_vcvtmq_s64_v:
5362   case NEON::BI__builtin_neon_vcvtmq_u16_v:
5363   case NEON::BI__builtin_neon_vcvtmq_u32_v:
5364   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
5365     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5366     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
5367   }
5368   case NEON::BI__builtin_neon_vext_v:
5369   case NEON::BI__builtin_neon_vextq_v: {
5370     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
5371     SmallVector<uint32_t, 16> Indices;
5372     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
5373       Indices.push_back(i+CV);
5374 
5375     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5376     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5377     return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
5378   }
5379   case NEON::BI__builtin_neon_vfma_v:
5380   case NEON::BI__builtin_neon_vfmaq_v: {
5381     Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5382     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5383     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5384     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5385 
5386     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
5387     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5388   }
5389   case NEON::BI__builtin_neon_vld1_v:
5390   case NEON::BI__builtin_neon_vld1q_v: {
5391     llvm::Type *Tys[] = {Ty, Int8PtrTy};
5392     Ops.push_back(getAlignmentValue32(PtrOp0));
5393     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
5394   }
5395   case NEON::BI__builtin_neon_vld1_x2_v:
5396   case NEON::BI__builtin_neon_vld1q_x2_v:
5397   case NEON::BI__builtin_neon_vld1_x3_v:
5398   case NEON::BI__builtin_neon_vld1q_x3_v:
5399   case NEON::BI__builtin_neon_vld1_x4_v:
5400   case NEON::BI__builtin_neon_vld1q_x4_v: {
5401     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
5402     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5403     llvm::Type *Tys[2] = { VTy, PTy };
5404     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
5405     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
5406     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5407     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5408     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5409   }
5410   case NEON::BI__builtin_neon_vld2_v:
5411   case NEON::BI__builtin_neon_vld2q_v:
5412   case NEON::BI__builtin_neon_vld3_v:
5413   case NEON::BI__builtin_neon_vld3q_v:
5414   case NEON::BI__builtin_neon_vld4_v:
5415   case NEON::BI__builtin_neon_vld4q_v:
5416   case NEON::BI__builtin_neon_vld2_dup_v:
5417   case NEON::BI__builtin_neon_vld2q_dup_v:
5418   case NEON::BI__builtin_neon_vld3_dup_v:
5419   case NEON::BI__builtin_neon_vld3q_dup_v:
5420   case NEON::BI__builtin_neon_vld4_dup_v:
5421   case NEON::BI__builtin_neon_vld4q_dup_v: {
5422     llvm::Type *Tys[] = {Ty, Int8PtrTy};
5423     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
5424     Value *Align = getAlignmentValue32(PtrOp1);
5425     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
5426     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5427     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5428     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5429   }
5430   case NEON::BI__builtin_neon_vld1_dup_v:
5431   case NEON::BI__builtin_neon_vld1q_dup_v: {
5432     Value *V = UndefValue::get(Ty);
5433     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
5434     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
5435     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
5436     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
5437     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
5438     return EmitNeonSplat(Ops[0], CI);
5439   }
5440   case NEON::BI__builtin_neon_vld2_lane_v:
5441   case NEON::BI__builtin_neon_vld2q_lane_v:
5442   case NEON::BI__builtin_neon_vld3_lane_v:
5443   case NEON::BI__builtin_neon_vld3q_lane_v:
5444   case NEON::BI__builtin_neon_vld4_lane_v:
5445   case NEON::BI__builtin_neon_vld4q_lane_v: {
5446     llvm::Type *Tys[] = {Ty, Int8PtrTy};
5447     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
5448     for (unsigned I = 2; I < Ops.size() - 1; ++I)
5449       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
5450     Ops.push_back(getAlignmentValue32(PtrOp1));
5451     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
5452     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5453     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5454     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5455   }
5456   case NEON::BI__builtin_neon_vmovl_v: {
5457     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
5458     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
5459     if (Usgn)
5460       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
5461     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
5462   }
5463   case NEON::BI__builtin_neon_vmovn_v: {
5464     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
5465     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
5466     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
5467   }
5468   case NEON::BI__builtin_neon_vmull_v:
5469     // FIXME: the integer vmull operations could be emitted in terms of pure
5470     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
5471     // hoisting the exts outside loops. Until global ISel comes along that can
5472     // see through such movement this leads to bad CodeGen. So we need an
5473     // intrinsic for now.
5474     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
5475     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
5476     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
5477   case NEON::BI__builtin_neon_vpadal_v:
5478   case NEON::BI__builtin_neon_vpadalq_v: {
5479     // The source operand type has twice as many elements of half the size.
5480     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
5481     llvm::Type *EltTy =
5482       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
5483     llvm::Type *NarrowTy =
5484       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
5485     llvm::Type *Tys[2] = { Ty, NarrowTy };
5486     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
5487   }
5488   case NEON::BI__builtin_neon_vpaddl_v:
5489   case NEON::BI__builtin_neon_vpaddlq_v: {
5490     // The source operand type has twice as many elements of half the size.
5491     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
5492     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
5493     llvm::Type *NarrowTy =
5494       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
5495     llvm::Type *Tys[2] = { Ty, NarrowTy };
5496     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
5497   }
5498   case NEON::BI__builtin_neon_vqdmlal_v:
5499   case NEON::BI__builtin_neon_vqdmlsl_v: {
5500     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
5501     Ops[1] =
5502         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
5503     Ops.resize(2);
5504     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
5505   }
5506   case NEON::BI__builtin_neon_vqshl_n_v:
5507   case NEON::BI__builtin_neon_vqshlq_n_v:
5508     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
5509                         1, false);
5510   case NEON::BI__builtin_neon_vqshlu_n_v:
5511   case NEON::BI__builtin_neon_vqshluq_n_v:
5512     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
5513                         1, false);
5514   case NEON::BI__builtin_neon_vrecpe_v:
5515   case NEON::BI__builtin_neon_vrecpeq_v:
5516   case NEON::BI__builtin_neon_vrsqrte_v:
5517   case NEON::BI__builtin_neon_vrsqrteq_v:
5518     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
5519     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
5520   case NEON::BI__builtin_neon_vrndi_v:
5521   case NEON::BI__builtin_neon_vrndiq_v:
5522     Int = Intrinsic::nearbyint;
5523     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
5524   case NEON::BI__builtin_neon_vrshr_n_v:
5525   case NEON::BI__builtin_neon_vrshrq_n_v:
5526     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
5527                         1, true);
5528   case NEON::BI__builtin_neon_vshl_n_v:
5529   case NEON::BI__builtin_neon_vshlq_n_v:
5530     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
5531     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
5532                              "vshl_n");
5533   case NEON::BI__builtin_neon_vshll_n_v: {
5534     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
5535     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
5536     if (Usgn)
5537       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
5538     else
5539       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
5540     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
5541     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
5542   }
5543   case NEON::BI__builtin_neon_vshrn_n_v: {
5544     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
5545     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
5546     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
5547     if (Usgn)
5548       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
5549     else
5550       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
5551     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
5552   }
5553   case NEON::BI__builtin_neon_vshr_n_v:
5554   case NEON::BI__builtin_neon_vshrq_n_v:
5555     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
5556   case NEON::BI__builtin_neon_vst1_v:
5557   case NEON::BI__builtin_neon_vst1q_v:
5558   case NEON::BI__builtin_neon_vst2_v:
5559   case NEON::BI__builtin_neon_vst2q_v:
5560   case NEON::BI__builtin_neon_vst3_v:
5561   case NEON::BI__builtin_neon_vst3q_v:
5562   case NEON::BI__builtin_neon_vst4_v:
5563   case NEON::BI__builtin_neon_vst4q_v:
5564   case NEON::BI__builtin_neon_vst2_lane_v:
5565   case NEON::BI__builtin_neon_vst2q_lane_v:
5566   case NEON::BI__builtin_neon_vst3_lane_v:
5567   case NEON::BI__builtin_neon_vst3q_lane_v:
5568   case NEON::BI__builtin_neon_vst4_lane_v:
5569   case NEON::BI__builtin_neon_vst4q_lane_v: {
5570     llvm::Type *Tys[] = {Int8PtrTy, Ty};
5571     Ops.push_back(getAlignmentValue32(PtrOp0));
5572     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
5573   }
5574   case NEON::BI__builtin_neon_vst1_x2_v:
5575   case NEON::BI__builtin_neon_vst1q_x2_v:
5576   case NEON::BI__builtin_neon_vst1_x3_v:
5577   case NEON::BI__builtin_neon_vst1q_x3_v:
5578   case NEON::BI__builtin_neon_vst1_x4_v:
5579   case NEON::BI__builtin_neon_vst1q_x4_v: {
5580     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
5581     // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
5582     // in AArch64 it comes last. We may want to stick to one or another.
5583     if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be) {
5584       llvm::Type *Tys[2] = { VTy, PTy };
5585       std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
5586       return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
5587     }
5588     llvm::Type *Tys[2] = { PTy, VTy };
5589     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
5590   }
5591   case NEON::BI__builtin_neon_vsubhn_v: {
5592     llvm::VectorType *SrcTy =
5593         llvm::VectorType::getExtendedElementVectorType(VTy);
5594 
5595     // %sum = add <4 x i32> %lhs, %rhs
5596     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
5597     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
5598     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
5599 
5600     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
5601     Constant *ShiftAmt =
5602         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
5603     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
5604 
5605     // %res = trunc <4 x i32> %high to <4 x i16>
5606     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
5607   }
5608   case NEON::BI__builtin_neon_vtrn_v:
5609   case NEON::BI__builtin_neon_vtrnq_v: {
5610     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
5611     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5612     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5613     Value *SV = nullptr;
5614 
5615     for (unsigned vi = 0; vi != 2; ++vi) {
5616       SmallVector<uint32_t, 16> Indices;
5617       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
5618         Indices.push_back(i+vi);
5619         Indices.push_back(i+e+vi);
5620       }
5621       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
5622       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
5623       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
5624     }
5625     return SV;
5626   }
5627   case NEON::BI__builtin_neon_vtst_v:
5628   case NEON::BI__builtin_neon_vtstq_v: {
5629     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5630     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5631     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5632     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5633                                 ConstantAggregateZero::get(Ty));
5634     return Builder.CreateSExt(Ops[0], Ty, "vtst");
5635   }
5636   case NEON::BI__builtin_neon_vuzp_v:
5637   case NEON::BI__builtin_neon_vuzpq_v: {
5638     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
5639     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5640     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5641     Value *SV = nullptr;
5642 
5643     for (unsigned vi = 0; vi != 2; ++vi) {
5644       SmallVector<uint32_t, 16> Indices;
5645       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
5646         Indices.push_back(2*i+vi);
5647 
5648       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
5649       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
5650       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
5651     }
5652     return SV;
5653   }
5654   case NEON::BI__builtin_neon_vzip_v:
5655   case NEON::BI__builtin_neon_vzipq_v: {
5656     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
5657     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5658     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5659     Value *SV = nullptr;
5660 
5661     for (unsigned vi = 0; vi != 2; ++vi) {
5662       SmallVector<uint32_t, 16> Indices;
5663       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
5664         Indices.push_back((i + vi*e) >> 1);
5665         Indices.push_back(((i + vi*e) >> 1)+e);
5666       }
5667       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
5668       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
5669       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
5670     }
5671     return SV;
5672   }
5673   case NEON::BI__builtin_neon_vdot_v:
5674   case NEON::BI__builtin_neon_vdotq_v: {
5675     llvm::Type *InputTy =
5676         llvm::VectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
5677     llvm::Type *Tys[2] = { Ty, InputTy };
5678     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
5679     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
5680   }
5681   case NEON::BI__builtin_neon_vfmlal_low_v:
5682   case NEON::BI__builtin_neon_vfmlalq_low_v: {
5683     llvm::Type *InputTy =
5684         llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
5685     llvm::Type *Tys[2] = { Ty, InputTy };
5686     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
5687   }
5688   case NEON::BI__builtin_neon_vfmlsl_low_v:
5689   case NEON::BI__builtin_neon_vfmlslq_low_v: {
5690     llvm::Type *InputTy =
5691         llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
5692     llvm::Type *Tys[2] = { Ty, InputTy };
5693     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
5694   }
5695   case NEON::BI__builtin_neon_vfmlal_high_v:
5696   case NEON::BI__builtin_neon_vfmlalq_high_v: {
5697     llvm::Type *InputTy =
5698            llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
5699     llvm::Type *Tys[2] = { Ty, InputTy };
5700     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
5701   }
5702   case NEON::BI__builtin_neon_vfmlsl_high_v:
5703   case NEON::BI__builtin_neon_vfmlslq_high_v: {
5704     llvm::Type *InputTy =
5705            llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
5706     llvm::Type *Tys[2] = { Ty, InputTy };
5707     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
5708   }
5709   }
5710 
5711   assert(Int && "Expected valid intrinsic number");
5712 
5713   // Determine the type(s) of this overloaded AArch64 intrinsic.
5714   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
5715 
5716   Value *Result = EmitNeonCall(F, Ops, NameHint);
5717   llvm::Type *ResultType = ConvertType(E->getType());
5718   // AArch64 intrinsic one-element vector type cast to
5719   // scalar type expected by the builtin
5720   return Builder.CreateBitCast(Result, ResultType, NameHint);
5721 }
5722 
5723 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
5724     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
5725     const CmpInst::Predicate Ip, const Twine &Name) {
5726   llvm::Type *OTy = Op->getType();
5727 
5728   // FIXME: this is utterly horrific. We should not be looking at previous
5729   // codegen context to find out what needs doing. Unfortunately TableGen
5730   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
5731   // (etc).
5732   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
5733     OTy = BI->getOperand(0)->getType();
5734 
5735   Op = Builder.CreateBitCast(Op, OTy);
5736   if (OTy->getScalarType()->isFloatingPointTy()) {
5737     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
5738   } else {
5739     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
5740   }
5741   return Builder.CreateSExt(Op, Ty, Name);
5742 }
5743 
5744 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
5745                                  Value *ExtOp, Value *IndexOp,
5746                                  llvm::Type *ResTy, unsigned IntID,
5747                                  const char *Name) {
5748   SmallVector<Value *, 2> TblOps;
5749   if (ExtOp)
5750     TblOps.push_back(ExtOp);
5751 
5752   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
5753   SmallVector<uint32_t, 16> Indices;
5754   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
5755   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
5756     Indices.push_back(2*i);
5757     Indices.push_back(2*i+1);
5758   }
5759 
5760   int PairPos = 0, End = Ops.size() - 1;
5761   while (PairPos < End) {
5762     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
5763                                                      Ops[PairPos+1], Indices,
5764                                                      Name));
5765     PairPos += 2;
5766   }
5767 
5768   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
5769   // of the 128-bit lookup table with zero.
5770   if (PairPos == End) {
5771     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
5772     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
5773                                                      ZeroTbl, Indices, Name));
5774   }
5775 
5776   Function *TblF;
5777   TblOps.push_back(IndexOp);
5778   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
5779 
5780   return CGF.EmitNeonCall(TblF, TblOps, Name);
5781 }
5782 
5783 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
5784   unsigned Value;
5785   switch (BuiltinID) {
5786   default:
5787     return nullptr;
5788   case ARM::BI__builtin_arm_nop:
5789     Value = 0;
5790     break;
5791   case ARM::BI__builtin_arm_yield:
5792   case ARM::BI__yield:
5793     Value = 1;
5794     break;
5795   case ARM::BI__builtin_arm_wfe:
5796   case ARM::BI__wfe:
5797     Value = 2;
5798     break;
5799   case ARM::BI__builtin_arm_wfi:
5800   case ARM::BI__wfi:
5801     Value = 3;
5802     break;
5803   case ARM::BI__builtin_arm_sev:
5804   case ARM::BI__sev:
5805     Value = 4;
5806     break;
5807   case ARM::BI__builtin_arm_sevl:
5808   case ARM::BI__sevl:
5809     Value = 5;
5810     break;
5811   }
5812 
5813   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
5814                             llvm::ConstantInt::get(Int32Ty, Value));
5815 }
5816 
5817 // Generates the IR for the read/write special register builtin,
5818 // ValueType is the type of the value that is to be written or read,
5819 // RegisterType is the type of the register being written to or read from.
5820 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
5821                                          const CallExpr *E,
5822                                          llvm::Type *RegisterType,
5823                                          llvm::Type *ValueType,
5824                                          bool IsRead,
5825                                          StringRef SysReg = "") {
5826   // write and register intrinsics only support 32 and 64 bit operations.
5827   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
5828           && "Unsupported size for register.");
5829 
5830   CodeGen::CGBuilderTy &Builder = CGF.Builder;
5831   CodeGen::CodeGenModule &CGM = CGF.CGM;
5832   LLVMContext &Context = CGM.getLLVMContext();
5833 
5834   if (SysReg.empty()) {
5835     const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
5836     SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
5837   }
5838 
5839   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
5840   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5841   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5842 
5843   llvm::Type *Types[] = { RegisterType };
5844 
5845   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
5846   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
5847             && "Can't fit 64-bit value in 32-bit register");
5848 
5849   if (IsRead) {
5850     llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
5851     llvm::Value *Call = Builder.CreateCall(F, Metadata);
5852 
5853     if (MixedTypes)
5854       // Read into 64 bit register and then truncate result to 32 bit.
5855       return Builder.CreateTrunc(Call, ValueType);
5856 
5857     if (ValueType->isPointerTy())
5858       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
5859       return Builder.CreateIntToPtr(Call, ValueType);
5860 
5861     return Call;
5862   }
5863 
5864   llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
5865   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
5866   if (MixedTypes) {
5867     // Extend 32 bit write value to 64 bit to pass to write.
5868     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
5869     return Builder.CreateCall(F, { Metadata, ArgValue });
5870   }
5871 
5872   if (ValueType->isPointerTy()) {
5873     // Have VoidPtrTy ArgValue but want to return an i32/i64.
5874     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
5875     return Builder.CreateCall(F, { Metadata, ArgValue });
5876   }
5877 
5878   return Builder.CreateCall(F, { Metadata, ArgValue });
5879 }
5880 
5881 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
5882 /// argument that specifies the vector type.
5883 static bool HasExtraNeonArgument(unsigned BuiltinID) {
5884   switch (BuiltinID) {
5885   default: break;
5886   case NEON::BI__builtin_neon_vget_lane_i8:
5887   case NEON::BI__builtin_neon_vget_lane_i16:
5888   case NEON::BI__builtin_neon_vget_lane_i32:
5889   case NEON::BI__builtin_neon_vget_lane_i64:
5890   case NEON::BI__builtin_neon_vget_lane_f32:
5891   case NEON::BI__builtin_neon_vgetq_lane_i8:
5892   case NEON::BI__builtin_neon_vgetq_lane_i16:
5893   case NEON::BI__builtin_neon_vgetq_lane_i32:
5894   case NEON::BI__builtin_neon_vgetq_lane_i64:
5895   case NEON::BI__builtin_neon_vgetq_lane_f32:
5896   case NEON::BI__builtin_neon_vset_lane_i8:
5897   case NEON::BI__builtin_neon_vset_lane_i16:
5898   case NEON::BI__builtin_neon_vset_lane_i32:
5899   case NEON::BI__builtin_neon_vset_lane_i64:
5900   case NEON::BI__builtin_neon_vset_lane_f32:
5901   case NEON::BI__builtin_neon_vsetq_lane_i8:
5902   case NEON::BI__builtin_neon_vsetq_lane_i16:
5903   case NEON::BI__builtin_neon_vsetq_lane_i32:
5904   case NEON::BI__builtin_neon_vsetq_lane_i64:
5905   case NEON::BI__builtin_neon_vsetq_lane_f32:
5906   case NEON::BI__builtin_neon_vsha1h_u32:
5907   case NEON::BI__builtin_neon_vsha1cq_u32:
5908   case NEON::BI__builtin_neon_vsha1pq_u32:
5909   case NEON::BI__builtin_neon_vsha1mq_u32:
5910   case clang::ARM::BI_MoveToCoprocessor:
5911   case clang::ARM::BI_MoveToCoprocessor2:
5912     return false;
5913   }
5914   return true;
5915 }
5916 
5917 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
5918                                            const CallExpr *E,
5919                                            llvm::Triple::ArchType Arch) {
5920   if (auto Hint = GetValueForARMHint(BuiltinID))
5921     return Hint;
5922 
5923   if (BuiltinID == ARM::BI__emit) {
5924     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
5925     llvm::FunctionType *FTy =
5926         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
5927 
5928     Expr::EvalResult Result;
5929     if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
5930       llvm_unreachable("Sema will ensure that the parameter is constant");
5931 
5932     llvm::APSInt Value = Result.Val.getInt();
5933     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
5934 
5935     llvm::InlineAsm *Emit =
5936         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
5937                                  /*SideEffects=*/true)
5938                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
5939                                  /*SideEffects=*/true);
5940 
5941     return Builder.CreateCall(Emit);
5942   }
5943 
5944   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
5945     Value *Option = EmitScalarExpr(E->getArg(0));
5946     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
5947   }
5948 
5949   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
5950     Value *Address = EmitScalarExpr(E->getArg(0));
5951     Value *RW      = EmitScalarExpr(E->getArg(1));
5952     Value *IsData  = EmitScalarExpr(E->getArg(2));
5953 
5954     // Locality is not supported on ARM target
5955     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
5956 
5957     Function *F = CGM.getIntrinsic(Intrinsic::prefetch);
5958     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5959   }
5960 
5961   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
5962     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5963     return Builder.CreateCall(
5964         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5965   }
5966 
5967   if (BuiltinID == ARM::BI__clear_cache) {
5968     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5969     const FunctionDecl *FD = E->getDirectCallee();
5970     Value *Ops[2];
5971     for (unsigned i = 0; i < 2; i++)
5972       Ops[i] = EmitScalarExpr(E->getArg(i));
5973     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5974     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5975     StringRef Name = FD->getName();
5976     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5977   }
5978 
5979   if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
5980       BuiltinID == ARM::BI__builtin_arm_mcrr2) {
5981     Function *F;
5982 
5983     switch (BuiltinID) {
5984     default: llvm_unreachable("unexpected builtin");
5985     case ARM::BI__builtin_arm_mcrr:
5986       F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
5987       break;
5988     case ARM::BI__builtin_arm_mcrr2:
5989       F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
5990       break;
5991     }
5992 
5993     // MCRR{2} instruction has 5 operands but
5994     // the intrinsic has 4 because Rt and Rt2
5995     // are represented as a single unsigned 64
5996     // bit integer in the intrinsic definition
5997     // but internally it's represented as 2 32
5998     // bit integers.
5999 
6000     Value *Coproc = EmitScalarExpr(E->getArg(0));
6001     Value *Opc1 = EmitScalarExpr(E->getArg(1));
6002     Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
6003     Value *CRm = EmitScalarExpr(E->getArg(3));
6004 
6005     Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
6006     Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
6007     Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
6008     Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
6009 
6010     return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
6011   }
6012 
6013   if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
6014       BuiltinID == ARM::BI__builtin_arm_mrrc2) {
6015     Function *F;
6016 
6017     switch (BuiltinID) {
6018     default: llvm_unreachable("unexpected builtin");
6019     case ARM::BI__builtin_arm_mrrc:
6020       F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
6021       break;
6022     case ARM::BI__builtin_arm_mrrc2:
6023       F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
6024       break;
6025     }
6026 
6027     Value *Coproc = EmitScalarExpr(E->getArg(0));
6028     Value *Opc1 = EmitScalarExpr(E->getArg(1));
6029     Value *CRm  = EmitScalarExpr(E->getArg(2));
6030     Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
6031 
6032     // Returns an unsigned 64 bit integer, represented
6033     // as two 32 bit integers.
6034 
6035     Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
6036     Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
6037     Rt = Builder.CreateZExt(Rt, Int64Ty);
6038     Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
6039 
6040     Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
6041     RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
6042     RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
6043 
6044     return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
6045   }
6046 
6047   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
6048       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
6049         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
6050        getContext().getTypeSize(E->getType()) == 64) ||
6051       BuiltinID == ARM::BI__ldrexd) {
6052     Function *F;
6053 
6054     switch (BuiltinID) {
6055     default: llvm_unreachable("unexpected builtin");
6056     case ARM::BI__builtin_arm_ldaex:
6057       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
6058       break;
6059     case ARM::BI__builtin_arm_ldrexd:
6060     case ARM::BI__builtin_arm_ldrex:
6061     case ARM::BI__ldrexd:
6062       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
6063       break;
6064     }
6065 
6066     Value *LdPtr = EmitScalarExpr(E->getArg(0));
6067     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
6068                                     "ldrexd");
6069 
6070     Value *Val0 = Builder.CreateExtractValue(Val, 1);
6071     Value *Val1 = Builder.CreateExtractValue(Val, 0);
6072     Val0 = Builder.CreateZExt(Val0, Int64Ty);
6073     Val1 = Builder.CreateZExt(Val1, Int64Ty);
6074 
6075     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
6076     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
6077     Val = Builder.CreateOr(Val, Val1);
6078     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
6079   }
6080 
6081   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
6082       BuiltinID == ARM::BI__builtin_arm_ldaex) {
6083     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
6084 
6085     QualType Ty = E->getType();
6086     llvm::Type *RealResTy = ConvertType(Ty);
6087     llvm::Type *PtrTy = llvm::IntegerType::get(
6088         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
6089     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
6090 
6091     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
6092                                        ? Intrinsic::arm_ldaex
6093                                        : Intrinsic::arm_ldrex,
6094                                    PtrTy);
6095     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
6096 
6097     if (RealResTy->isPointerTy())
6098       return Builder.CreateIntToPtr(Val, RealResTy);
6099     else {
6100       llvm::Type *IntResTy = llvm::IntegerType::get(
6101           getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
6102       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
6103       return Builder.CreateBitCast(Val, RealResTy);
6104     }
6105   }
6106 
6107   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
6108       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
6109         BuiltinID == ARM::BI__builtin_arm_strex) &&
6110        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
6111     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
6112                                        ? Intrinsic::arm_stlexd
6113                                        : Intrinsic::arm_strexd);
6114     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
6115 
6116     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
6117     Value *Val = EmitScalarExpr(E->getArg(0));
6118     Builder.CreateStore(Val, Tmp);
6119 
6120     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
6121     Val = Builder.CreateLoad(LdPtr);
6122 
6123     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
6124     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
6125     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
6126     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
6127   }
6128 
6129   if (BuiltinID == ARM::BI__builtin_arm_strex ||
6130       BuiltinID == ARM::BI__builtin_arm_stlex) {
6131     Value *StoreVal = EmitScalarExpr(E->getArg(0));
6132     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
6133 
6134     QualType Ty = E->getArg(0)->getType();
6135     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
6136                                                  getContext().getTypeSize(Ty));
6137     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
6138 
6139     if (StoreVal->getType()->isPointerTy())
6140       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
6141     else {
6142       llvm::Type *IntTy = llvm::IntegerType::get(
6143           getLLVMContext(),
6144           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
6145       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
6146       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
6147     }
6148 
6149     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
6150                                        ? Intrinsic::arm_stlex
6151                                        : Intrinsic::arm_strex,
6152                                    StoreAddr->getType());
6153     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
6154   }
6155 
6156   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
6157     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
6158     return Builder.CreateCall(F);
6159   }
6160 
6161   // CRC32
6162   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
6163   switch (BuiltinID) {
6164   case ARM::BI__builtin_arm_crc32b:
6165     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
6166   case ARM::BI__builtin_arm_crc32cb:
6167     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
6168   case ARM::BI__builtin_arm_crc32h:
6169     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
6170   case ARM::BI__builtin_arm_crc32ch:
6171     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
6172   case ARM::BI__builtin_arm_crc32w:
6173   case ARM::BI__builtin_arm_crc32d:
6174     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
6175   case ARM::BI__builtin_arm_crc32cw:
6176   case ARM::BI__builtin_arm_crc32cd:
6177     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
6178   }
6179 
6180   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
6181     Value *Arg0 = EmitScalarExpr(E->getArg(0));
6182     Value *Arg1 = EmitScalarExpr(E->getArg(1));
6183 
6184     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
6185     // intrinsics, hence we need different codegen for these cases.
6186     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
6187         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
6188       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
6189       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
6190       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
6191       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
6192 
6193       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
6194       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
6195       return Builder.CreateCall(F, {Res, Arg1b});
6196     } else {
6197       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
6198 
6199       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
6200       return Builder.CreateCall(F, {Arg0, Arg1});
6201     }
6202   }
6203 
6204   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
6205       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
6206       BuiltinID == ARM::BI__builtin_arm_rsrp ||
6207       BuiltinID == ARM::BI__builtin_arm_wsr ||
6208       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
6209       BuiltinID == ARM::BI__builtin_arm_wsrp) {
6210 
6211     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
6212                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
6213                   BuiltinID == ARM::BI__builtin_arm_rsrp;
6214 
6215     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
6216                             BuiltinID == ARM::BI__builtin_arm_wsrp;
6217 
6218     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
6219                    BuiltinID == ARM::BI__builtin_arm_wsr64;
6220 
6221     llvm::Type *ValueType;
6222     llvm::Type *RegisterType;
6223     if (IsPointerBuiltin) {
6224       ValueType = VoidPtrTy;
6225       RegisterType = Int32Ty;
6226     } else if (Is64Bit) {
6227       ValueType = RegisterType = Int64Ty;
6228     } else {
6229       ValueType = RegisterType = Int32Ty;
6230     }
6231 
6232     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
6233   }
6234 
6235   // Find out if any arguments are required to be integer constant
6236   // expressions.
6237   unsigned ICEArguments = 0;
6238   ASTContext::GetBuiltinTypeError Error;
6239   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6240   assert(Error == ASTContext::GE_None && "Should not codegen an error");
6241 
6242   auto getAlignmentValue32 = [&](Address addr) -> Value* {
6243     return Builder.getInt32(addr.getAlignment().getQuantity());
6244   };
6245 
6246   Address PtrOp0 = Address::invalid();
6247   Address PtrOp1 = Address::invalid();
6248   SmallVector<Value*, 4> Ops;
6249   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
6250   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
6251   for (unsigned i = 0, e = NumArgs; i != e; i++) {
6252     if (i == 0) {
6253       switch (BuiltinID) {
6254       case NEON::BI__builtin_neon_vld1_v:
6255       case NEON::BI__builtin_neon_vld1q_v:
6256       case NEON::BI__builtin_neon_vld1q_lane_v:
6257       case NEON::BI__builtin_neon_vld1_lane_v:
6258       case NEON::BI__builtin_neon_vld1_dup_v:
6259       case NEON::BI__builtin_neon_vld1q_dup_v:
6260       case NEON::BI__builtin_neon_vst1_v:
6261       case NEON::BI__builtin_neon_vst1q_v:
6262       case NEON::BI__builtin_neon_vst1q_lane_v:
6263       case NEON::BI__builtin_neon_vst1_lane_v:
6264       case NEON::BI__builtin_neon_vst2_v:
6265       case NEON::BI__builtin_neon_vst2q_v:
6266       case NEON::BI__builtin_neon_vst2_lane_v:
6267       case NEON::BI__builtin_neon_vst2q_lane_v:
6268       case NEON::BI__builtin_neon_vst3_v:
6269       case NEON::BI__builtin_neon_vst3q_v:
6270       case NEON::BI__builtin_neon_vst3_lane_v:
6271       case NEON::BI__builtin_neon_vst3q_lane_v:
6272       case NEON::BI__builtin_neon_vst4_v:
6273       case NEON::BI__builtin_neon_vst4q_v:
6274       case NEON::BI__builtin_neon_vst4_lane_v:
6275       case NEON::BI__builtin_neon_vst4q_lane_v:
6276         // Get the alignment for the argument in addition to the value;
6277         // we'll use it later.
6278         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
6279         Ops.push_back(PtrOp0.getPointer());
6280         continue;
6281       }
6282     }
6283     if (i == 1) {
6284       switch (BuiltinID) {
6285       case NEON::BI__builtin_neon_vld2_v:
6286       case NEON::BI__builtin_neon_vld2q_v:
6287       case NEON::BI__builtin_neon_vld3_v:
6288       case NEON::BI__builtin_neon_vld3q_v:
6289       case NEON::BI__builtin_neon_vld4_v:
6290       case NEON::BI__builtin_neon_vld4q_v:
6291       case NEON::BI__builtin_neon_vld2_lane_v:
6292       case NEON::BI__builtin_neon_vld2q_lane_v:
6293       case NEON::BI__builtin_neon_vld3_lane_v:
6294       case NEON::BI__builtin_neon_vld3q_lane_v:
6295       case NEON::BI__builtin_neon_vld4_lane_v:
6296       case NEON::BI__builtin_neon_vld4q_lane_v:
6297       case NEON::BI__builtin_neon_vld2_dup_v:
6298       case NEON::BI__builtin_neon_vld2q_dup_v:
6299       case NEON::BI__builtin_neon_vld3_dup_v:
6300       case NEON::BI__builtin_neon_vld3q_dup_v:
6301       case NEON::BI__builtin_neon_vld4_dup_v:
6302       case NEON::BI__builtin_neon_vld4q_dup_v:
6303         // Get the alignment for the argument in addition to the value;
6304         // we'll use it later.
6305         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
6306         Ops.push_back(PtrOp1.getPointer());
6307         continue;
6308       }
6309     }
6310 
6311     if ((ICEArguments & (1 << i)) == 0) {
6312       Ops.push_back(EmitScalarExpr(E->getArg(i)));
6313     } else {
6314       // If this is required to be a constant, constant fold it so that we know
6315       // that the generated intrinsic gets a ConstantInt.
6316       llvm::APSInt Result;
6317       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
6318       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
6319       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
6320     }
6321   }
6322 
6323   switch (BuiltinID) {
6324   default: break;
6325 
6326   case NEON::BI__builtin_neon_vget_lane_i8:
6327   case NEON::BI__builtin_neon_vget_lane_i16:
6328   case NEON::BI__builtin_neon_vget_lane_i32:
6329   case NEON::BI__builtin_neon_vget_lane_i64:
6330   case NEON::BI__builtin_neon_vget_lane_f32:
6331   case NEON::BI__builtin_neon_vgetq_lane_i8:
6332   case NEON::BI__builtin_neon_vgetq_lane_i16:
6333   case NEON::BI__builtin_neon_vgetq_lane_i32:
6334   case NEON::BI__builtin_neon_vgetq_lane_i64:
6335   case NEON::BI__builtin_neon_vgetq_lane_f32:
6336     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
6337 
6338   case NEON::BI__builtin_neon_vrndns_f32: {
6339     Value *Arg = EmitScalarExpr(E->getArg(0));
6340     llvm::Type *Tys[] = {Arg->getType()};
6341     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
6342     return Builder.CreateCall(F, {Arg}, "vrndn"); }
6343 
6344   case NEON::BI__builtin_neon_vset_lane_i8:
6345   case NEON::BI__builtin_neon_vset_lane_i16:
6346   case NEON::BI__builtin_neon_vset_lane_i32:
6347   case NEON::BI__builtin_neon_vset_lane_i64:
6348   case NEON::BI__builtin_neon_vset_lane_f32:
6349   case NEON::BI__builtin_neon_vsetq_lane_i8:
6350   case NEON::BI__builtin_neon_vsetq_lane_i16:
6351   case NEON::BI__builtin_neon_vsetq_lane_i32:
6352   case NEON::BI__builtin_neon_vsetq_lane_i64:
6353   case NEON::BI__builtin_neon_vsetq_lane_f32:
6354     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6355 
6356   case NEON::BI__builtin_neon_vsha1h_u32:
6357     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
6358                         "vsha1h");
6359   case NEON::BI__builtin_neon_vsha1cq_u32:
6360     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
6361                         "vsha1h");
6362   case NEON::BI__builtin_neon_vsha1pq_u32:
6363     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
6364                         "vsha1h");
6365   case NEON::BI__builtin_neon_vsha1mq_u32:
6366     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
6367                         "vsha1h");
6368 
6369   // The ARM _MoveToCoprocessor builtins put the input register value as
6370   // the first argument, but the LLVM intrinsic expects it as the third one.
6371   case ARM::BI_MoveToCoprocessor:
6372   case ARM::BI_MoveToCoprocessor2: {
6373     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
6374                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
6375     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
6376                                   Ops[3], Ops[4], Ops[5]});
6377   }
6378   case ARM::BI_BitScanForward:
6379   case ARM::BI_BitScanForward64:
6380     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
6381   case ARM::BI_BitScanReverse:
6382   case ARM::BI_BitScanReverse64:
6383     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
6384 
6385   case ARM::BI_InterlockedAnd64:
6386     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
6387   case ARM::BI_InterlockedExchange64:
6388     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
6389   case ARM::BI_InterlockedExchangeAdd64:
6390     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
6391   case ARM::BI_InterlockedExchangeSub64:
6392     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
6393   case ARM::BI_InterlockedOr64:
6394     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
6395   case ARM::BI_InterlockedXor64:
6396     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
6397   case ARM::BI_InterlockedDecrement64:
6398     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
6399   case ARM::BI_InterlockedIncrement64:
6400     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
6401   case ARM::BI_InterlockedExchangeAdd8_acq:
6402   case ARM::BI_InterlockedExchangeAdd16_acq:
6403   case ARM::BI_InterlockedExchangeAdd_acq:
6404   case ARM::BI_InterlockedExchangeAdd64_acq:
6405     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E);
6406   case ARM::BI_InterlockedExchangeAdd8_rel:
6407   case ARM::BI_InterlockedExchangeAdd16_rel:
6408   case ARM::BI_InterlockedExchangeAdd_rel:
6409   case ARM::BI_InterlockedExchangeAdd64_rel:
6410     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E);
6411   case ARM::BI_InterlockedExchangeAdd8_nf:
6412   case ARM::BI_InterlockedExchangeAdd16_nf:
6413   case ARM::BI_InterlockedExchangeAdd_nf:
6414   case ARM::BI_InterlockedExchangeAdd64_nf:
6415     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E);
6416   case ARM::BI_InterlockedExchange8_acq:
6417   case ARM::BI_InterlockedExchange16_acq:
6418   case ARM::BI_InterlockedExchange_acq:
6419   case ARM::BI_InterlockedExchange64_acq:
6420     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E);
6421   case ARM::BI_InterlockedExchange8_rel:
6422   case ARM::BI_InterlockedExchange16_rel:
6423   case ARM::BI_InterlockedExchange_rel:
6424   case ARM::BI_InterlockedExchange64_rel:
6425     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E);
6426   case ARM::BI_InterlockedExchange8_nf:
6427   case ARM::BI_InterlockedExchange16_nf:
6428   case ARM::BI_InterlockedExchange_nf:
6429   case ARM::BI_InterlockedExchange64_nf:
6430     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E);
6431   case ARM::BI_InterlockedCompareExchange8_acq:
6432   case ARM::BI_InterlockedCompareExchange16_acq:
6433   case ARM::BI_InterlockedCompareExchange_acq:
6434   case ARM::BI_InterlockedCompareExchange64_acq:
6435     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E);
6436   case ARM::BI_InterlockedCompareExchange8_rel:
6437   case ARM::BI_InterlockedCompareExchange16_rel:
6438   case ARM::BI_InterlockedCompareExchange_rel:
6439   case ARM::BI_InterlockedCompareExchange64_rel:
6440     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E);
6441   case ARM::BI_InterlockedCompareExchange8_nf:
6442   case ARM::BI_InterlockedCompareExchange16_nf:
6443   case ARM::BI_InterlockedCompareExchange_nf:
6444   case ARM::BI_InterlockedCompareExchange64_nf:
6445     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E);
6446   case ARM::BI_InterlockedOr8_acq:
6447   case ARM::BI_InterlockedOr16_acq:
6448   case ARM::BI_InterlockedOr_acq:
6449   case ARM::BI_InterlockedOr64_acq:
6450     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E);
6451   case ARM::BI_InterlockedOr8_rel:
6452   case ARM::BI_InterlockedOr16_rel:
6453   case ARM::BI_InterlockedOr_rel:
6454   case ARM::BI_InterlockedOr64_rel:
6455     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E);
6456   case ARM::BI_InterlockedOr8_nf:
6457   case ARM::BI_InterlockedOr16_nf:
6458   case ARM::BI_InterlockedOr_nf:
6459   case ARM::BI_InterlockedOr64_nf:
6460     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E);
6461   case ARM::BI_InterlockedXor8_acq:
6462   case ARM::BI_InterlockedXor16_acq:
6463   case ARM::BI_InterlockedXor_acq:
6464   case ARM::BI_InterlockedXor64_acq:
6465     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E);
6466   case ARM::BI_InterlockedXor8_rel:
6467   case ARM::BI_InterlockedXor16_rel:
6468   case ARM::BI_InterlockedXor_rel:
6469   case ARM::BI_InterlockedXor64_rel:
6470     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E);
6471   case ARM::BI_InterlockedXor8_nf:
6472   case ARM::BI_InterlockedXor16_nf:
6473   case ARM::BI_InterlockedXor_nf:
6474   case ARM::BI_InterlockedXor64_nf:
6475     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E);
6476   case ARM::BI_InterlockedAnd8_acq:
6477   case ARM::BI_InterlockedAnd16_acq:
6478   case ARM::BI_InterlockedAnd_acq:
6479   case ARM::BI_InterlockedAnd64_acq:
6480     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E);
6481   case ARM::BI_InterlockedAnd8_rel:
6482   case ARM::BI_InterlockedAnd16_rel:
6483   case ARM::BI_InterlockedAnd_rel:
6484   case ARM::BI_InterlockedAnd64_rel:
6485     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E);
6486   case ARM::BI_InterlockedAnd8_nf:
6487   case ARM::BI_InterlockedAnd16_nf:
6488   case ARM::BI_InterlockedAnd_nf:
6489   case ARM::BI_InterlockedAnd64_nf:
6490     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E);
6491   case ARM::BI_InterlockedIncrement16_acq:
6492   case ARM::BI_InterlockedIncrement_acq:
6493   case ARM::BI_InterlockedIncrement64_acq:
6494     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E);
6495   case ARM::BI_InterlockedIncrement16_rel:
6496   case ARM::BI_InterlockedIncrement_rel:
6497   case ARM::BI_InterlockedIncrement64_rel:
6498     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E);
6499   case ARM::BI_InterlockedIncrement16_nf:
6500   case ARM::BI_InterlockedIncrement_nf:
6501   case ARM::BI_InterlockedIncrement64_nf:
6502     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E);
6503   case ARM::BI_InterlockedDecrement16_acq:
6504   case ARM::BI_InterlockedDecrement_acq:
6505   case ARM::BI_InterlockedDecrement64_acq:
6506     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E);
6507   case ARM::BI_InterlockedDecrement16_rel:
6508   case ARM::BI_InterlockedDecrement_rel:
6509   case ARM::BI_InterlockedDecrement64_rel:
6510     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E);
6511   case ARM::BI_InterlockedDecrement16_nf:
6512   case ARM::BI_InterlockedDecrement_nf:
6513   case ARM::BI_InterlockedDecrement64_nf:
6514     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E);
6515   }
6516 
6517   // Get the last argument, which specifies the vector type.
6518   assert(HasExtraArg);
6519   llvm::APSInt Result;
6520   const Expr *Arg = E->getArg(E->getNumArgs()-1);
6521   if (!Arg->isIntegerConstantExpr(Result, getContext()))
6522     return nullptr;
6523 
6524   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
6525       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
6526     // Determine the overloaded type of this builtin.
6527     llvm::Type *Ty;
6528     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
6529       Ty = FloatTy;
6530     else
6531       Ty = DoubleTy;
6532 
6533     // Determine whether this is an unsigned conversion or not.
6534     bool usgn = Result.getZExtValue() == 1;
6535     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
6536 
6537     // Call the appropriate intrinsic.
6538     Function *F = CGM.getIntrinsic(Int, Ty);
6539     return Builder.CreateCall(F, Ops, "vcvtr");
6540   }
6541 
6542   // Determine the type of this overloaded NEON intrinsic.
6543   NeonTypeFlags Type(Result.getZExtValue());
6544   bool usgn = Type.isUnsigned();
6545   bool rightShift = false;
6546 
6547   llvm::VectorType *VTy = GetNeonType(this, Type,
6548                                       getTarget().hasLegalHalfType());
6549   llvm::Type *Ty = VTy;
6550   if (!Ty)
6551     return nullptr;
6552 
6553   // Many NEON builtins have identical semantics and uses in ARM and
6554   // AArch64. Emit these in a single function.
6555   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
6556   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
6557       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
6558   if (Builtin)
6559     return EmitCommonNeonBuiltinExpr(
6560         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6561         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
6562 
6563   unsigned Int;
6564   switch (BuiltinID) {
6565   default: return nullptr;
6566   case NEON::BI__builtin_neon_vld1q_lane_v:
6567     // Handle 64-bit integer elements as a special case.  Use shuffles of
6568     // one-element vectors to avoid poor code for i64 in the backend.
6569     if (VTy->getElementType()->isIntegerTy(64)) {
6570       // Extract the other lane.
6571       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6572       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
6573       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
6574       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
6575       // Load the value as a one-element vector.
6576       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
6577       llvm::Type *Tys[] = {Ty, Int8PtrTy};
6578       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
6579       Value *Align = getAlignmentValue32(PtrOp0);
6580       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
6581       // Combine them.
6582       uint32_t Indices[] = {1 - Lane, Lane};
6583       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
6584       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
6585     }
6586     LLVM_FALLTHROUGH;
6587   case NEON::BI__builtin_neon_vld1_lane_v: {
6588     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6589     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
6590     Value *Ld = Builder.CreateLoad(PtrOp0);
6591     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
6592   }
6593   case NEON::BI__builtin_neon_vqrshrn_n_v:
6594     Int =
6595       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
6596     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
6597                         1, true);
6598   case NEON::BI__builtin_neon_vqrshrun_n_v:
6599     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
6600                         Ops, "vqrshrun_n", 1, true);
6601   case NEON::BI__builtin_neon_vqshrn_n_v:
6602     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
6603     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
6604                         1, true);
6605   case NEON::BI__builtin_neon_vqshrun_n_v:
6606     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
6607                         Ops, "vqshrun_n", 1, true);
6608   case NEON::BI__builtin_neon_vrecpe_v:
6609   case NEON::BI__builtin_neon_vrecpeq_v:
6610     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
6611                         Ops, "vrecpe");
6612   case NEON::BI__builtin_neon_vrshrn_n_v:
6613     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
6614                         Ops, "vrshrn_n", 1, true);
6615   case NEON::BI__builtin_neon_vrsra_n_v:
6616   case NEON::BI__builtin_neon_vrsraq_n_v:
6617     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6618     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6619     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
6620     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
6621     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
6622     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
6623   case NEON::BI__builtin_neon_vsri_n_v:
6624   case NEON::BI__builtin_neon_vsriq_n_v:
6625     rightShift = true;
6626     LLVM_FALLTHROUGH;
6627   case NEON::BI__builtin_neon_vsli_n_v:
6628   case NEON::BI__builtin_neon_vsliq_n_v:
6629     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
6630     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
6631                         Ops, "vsli_n");
6632   case NEON::BI__builtin_neon_vsra_n_v:
6633   case NEON::BI__builtin_neon_vsraq_n_v:
6634     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6635     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6636     return Builder.CreateAdd(Ops[0], Ops[1]);
6637   case NEON::BI__builtin_neon_vst1q_lane_v:
6638     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
6639     // a one-element vector and avoid poor code for i64 in the backend.
6640     if (VTy->getElementType()->isIntegerTy(64)) {
6641       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6642       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
6643       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
6644       Ops[2] = getAlignmentValue32(PtrOp0);
6645       llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
6646       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
6647                                                  Tys), Ops);
6648     }
6649     LLVM_FALLTHROUGH;
6650   case NEON::BI__builtin_neon_vst1_lane_v: {
6651     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6652     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6653     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6654     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
6655     return St;
6656   }
6657   case NEON::BI__builtin_neon_vtbl1_v:
6658     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
6659                         Ops, "vtbl1");
6660   case NEON::BI__builtin_neon_vtbl2_v:
6661     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
6662                         Ops, "vtbl2");
6663   case NEON::BI__builtin_neon_vtbl3_v:
6664     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
6665                         Ops, "vtbl3");
6666   case NEON::BI__builtin_neon_vtbl4_v:
6667     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
6668                         Ops, "vtbl4");
6669   case NEON::BI__builtin_neon_vtbx1_v:
6670     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
6671                         Ops, "vtbx1");
6672   case NEON::BI__builtin_neon_vtbx2_v:
6673     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
6674                         Ops, "vtbx2");
6675   case NEON::BI__builtin_neon_vtbx3_v:
6676     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
6677                         Ops, "vtbx3");
6678   case NEON::BI__builtin_neon_vtbx4_v:
6679     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
6680                         Ops, "vtbx4");
6681   }
6682 }
6683 
6684 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
6685                                       const CallExpr *E,
6686                                       SmallVectorImpl<Value *> &Ops,
6687                                       llvm::Triple::ArchType Arch) {
6688   unsigned int Int = 0;
6689   const char *s = nullptr;
6690 
6691   switch (BuiltinID) {
6692   default:
6693     return nullptr;
6694   case NEON::BI__builtin_neon_vtbl1_v:
6695   case NEON::BI__builtin_neon_vqtbl1_v:
6696   case NEON::BI__builtin_neon_vqtbl1q_v:
6697   case NEON::BI__builtin_neon_vtbl2_v:
6698   case NEON::BI__builtin_neon_vqtbl2_v:
6699   case NEON::BI__builtin_neon_vqtbl2q_v:
6700   case NEON::BI__builtin_neon_vtbl3_v:
6701   case NEON::BI__builtin_neon_vqtbl3_v:
6702   case NEON::BI__builtin_neon_vqtbl3q_v:
6703   case NEON::BI__builtin_neon_vtbl4_v:
6704   case NEON::BI__builtin_neon_vqtbl4_v:
6705   case NEON::BI__builtin_neon_vqtbl4q_v:
6706     break;
6707   case NEON::BI__builtin_neon_vtbx1_v:
6708   case NEON::BI__builtin_neon_vqtbx1_v:
6709   case NEON::BI__builtin_neon_vqtbx1q_v:
6710   case NEON::BI__builtin_neon_vtbx2_v:
6711   case NEON::BI__builtin_neon_vqtbx2_v:
6712   case NEON::BI__builtin_neon_vqtbx2q_v:
6713   case NEON::BI__builtin_neon_vtbx3_v:
6714   case NEON::BI__builtin_neon_vqtbx3_v:
6715   case NEON::BI__builtin_neon_vqtbx3q_v:
6716   case NEON::BI__builtin_neon_vtbx4_v:
6717   case NEON::BI__builtin_neon_vqtbx4_v:
6718   case NEON::BI__builtin_neon_vqtbx4q_v:
6719     break;
6720   }
6721 
6722   assert(E->getNumArgs() >= 3);
6723 
6724   // Get the last argument, which specifies the vector type.
6725   llvm::APSInt Result;
6726   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
6727   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
6728     return nullptr;
6729 
6730   // Determine the type of this overloaded NEON intrinsic.
6731   NeonTypeFlags Type(Result.getZExtValue());
6732   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
6733   if (!Ty)
6734     return nullptr;
6735 
6736   CodeGen::CGBuilderTy &Builder = CGF.Builder;
6737 
6738   // AArch64 scalar builtins are not overloaded, they do not have an extra
6739   // argument that specifies the vector type, need to handle each case.
6740   switch (BuiltinID) {
6741   case NEON::BI__builtin_neon_vtbl1_v: {
6742     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
6743                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
6744                               "vtbl1");
6745   }
6746   case NEON::BI__builtin_neon_vtbl2_v: {
6747     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
6748                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
6749                               "vtbl1");
6750   }
6751   case NEON::BI__builtin_neon_vtbl3_v: {
6752     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
6753                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
6754                               "vtbl2");
6755   }
6756   case NEON::BI__builtin_neon_vtbl4_v: {
6757     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
6758                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
6759                               "vtbl2");
6760   }
6761   case NEON::BI__builtin_neon_vtbx1_v: {
6762     Value *TblRes =
6763         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
6764                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
6765 
6766     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
6767     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
6768     CmpRes = Builder.CreateSExt(CmpRes, Ty);
6769 
6770     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
6771     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
6772     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
6773   }
6774   case NEON::BI__builtin_neon_vtbx2_v: {
6775     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
6776                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
6777                               "vtbx1");
6778   }
6779   case NEON::BI__builtin_neon_vtbx3_v: {
6780     Value *TblRes =
6781         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
6782                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
6783 
6784     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
6785     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
6786                                            TwentyFourV);
6787     CmpRes = Builder.CreateSExt(CmpRes, Ty);
6788 
6789     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
6790     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
6791     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
6792   }
6793   case NEON::BI__builtin_neon_vtbx4_v: {
6794     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
6795                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
6796                               "vtbx2");
6797   }
6798   case NEON::BI__builtin_neon_vqtbl1_v:
6799   case NEON::BI__builtin_neon_vqtbl1q_v:
6800     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
6801   case NEON::BI__builtin_neon_vqtbl2_v:
6802   case NEON::BI__builtin_neon_vqtbl2q_v: {
6803     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
6804   case NEON::BI__builtin_neon_vqtbl3_v:
6805   case NEON::BI__builtin_neon_vqtbl3q_v:
6806     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
6807   case NEON::BI__builtin_neon_vqtbl4_v:
6808   case NEON::BI__builtin_neon_vqtbl4q_v:
6809     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
6810   case NEON::BI__builtin_neon_vqtbx1_v:
6811   case NEON::BI__builtin_neon_vqtbx1q_v:
6812     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
6813   case NEON::BI__builtin_neon_vqtbx2_v:
6814   case NEON::BI__builtin_neon_vqtbx2q_v:
6815     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
6816   case NEON::BI__builtin_neon_vqtbx3_v:
6817   case NEON::BI__builtin_neon_vqtbx3q_v:
6818     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
6819   case NEON::BI__builtin_neon_vqtbx4_v:
6820   case NEON::BI__builtin_neon_vqtbx4q_v:
6821     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
6822   }
6823   }
6824 
6825   if (!Int)
6826     return nullptr;
6827 
6828   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
6829   return CGF.EmitNeonCall(F, Ops, s);
6830 }
6831 
6832 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
6833   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
6834   Op = Builder.CreateBitCast(Op, Int16Ty);
6835   Value *V = UndefValue::get(VTy);
6836   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
6837   Op = Builder.CreateInsertElement(V, Op, CI);
6838   return Op;
6839 }
6840 
6841 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
6842                                                const CallExpr *E,
6843                                                llvm::Triple::ArchType Arch) {
6844   unsigned HintID = static_cast<unsigned>(-1);
6845   switch (BuiltinID) {
6846   default: break;
6847   case AArch64::BI__builtin_arm_nop:
6848     HintID = 0;
6849     break;
6850   case AArch64::BI__builtin_arm_yield:
6851   case AArch64::BI__yield:
6852     HintID = 1;
6853     break;
6854   case AArch64::BI__builtin_arm_wfe:
6855   case AArch64::BI__wfe:
6856     HintID = 2;
6857     break;
6858   case AArch64::BI__builtin_arm_wfi:
6859   case AArch64::BI__wfi:
6860     HintID = 3;
6861     break;
6862   case AArch64::BI__builtin_arm_sev:
6863   case AArch64::BI__sev:
6864     HintID = 4;
6865     break;
6866   case AArch64::BI__builtin_arm_sevl:
6867   case AArch64::BI__sevl:
6868     HintID = 5;
6869     break;
6870   }
6871 
6872   if (HintID != static_cast<unsigned>(-1)) {
6873     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
6874     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
6875   }
6876 
6877   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
6878     Value *Address         = EmitScalarExpr(E->getArg(0));
6879     Value *RW              = EmitScalarExpr(E->getArg(1));
6880     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
6881     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
6882     Value *IsData          = EmitScalarExpr(E->getArg(4));
6883 
6884     Value *Locality = nullptr;
6885     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
6886       // Temporal fetch, needs to convert cache level to locality.
6887       Locality = llvm::ConstantInt::get(Int32Ty,
6888         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
6889     } else {
6890       // Streaming fetch.
6891       Locality = llvm::ConstantInt::get(Int32Ty, 0);
6892     }
6893 
6894     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
6895     // PLDL3STRM or PLDL2STRM.
6896     Function *F = CGM.getIntrinsic(Intrinsic::prefetch);
6897     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
6898   }
6899 
6900   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
6901     assert((getContext().getTypeSize(E->getType()) == 32) &&
6902            "rbit of unusual size!");
6903     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
6904     return Builder.CreateCall(
6905         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
6906   }
6907   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
6908     assert((getContext().getTypeSize(E->getType()) == 64) &&
6909            "rbit of unusual size!");
6910     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
6911     return Builder.CreateCall(
6912         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
6913   }
6914 
6915   if (BuiltinID == AArch64::BI__clear_cache) {
6916     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
6917     const FunctionDecl *FD = E->getDirectCallee();
6918     Value *Ops[2];
6919     for (unsigned i = 0; i < 2; i++)
6920       Ops[i] = EmitScalarExpr(E->getArg(i));
6921     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
6922     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
6923     StringRef Name = FD->getName();
6924     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
6925   }
6926 
6927   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
6928       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
6929       getContext().getTypeSize(E->getType()) == 128) {
6930     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
6931                                        ? Intrinsic::aarch64_ldaxp
6932                                        : Intrinsic::aarch64_ldxp);
6933 
6934     Value *LdPtr = EmitScalarExpr(E->getArg(0));
6935     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
6936                                     "ldxp");
6937 
6938     Value *Val0 = Builder.CreateExtractValue(Val, 1);
6939     Value *Val1 = Builder.CreateExtractValue(Val, 0);
6940     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
6941     Val0 = Builder.CreateZExt(Val0, Int128Ty);
6942     Val1 = Builder.CreateZExt(Val1, Int128Ty);
6943 
6944     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
6945     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
6946     Val = Builder.CreateOr(Val, Val1);
6947     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
6948   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
6949              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
6950     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
6951 
6952     QualType Ty = E->getType();
6953     llvm::Type *RealResTy = ConvertType(Ty);
6954     llvm::Type *PtrTy = llvm::IntegerType::get(
6955         getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
6956     LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
6957 
6958     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
6959                                        ? Intrinsic::aarch64_ldaxr
6960                                        : Intrinsic::aarch64_ldxr,
6961                                    PtrTy);
6962     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
6963 
6964     if (RealResTy->isPointerTy())
6965       return Builder.CreateIntToPtr(Val, RealResTy);
6966 
6967     llvm::Type *IntResTy = llvm::IntegerType::get(
6968         getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
6969     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
6970     return Builder.CreateBitCast(Val, RealResTy);
6971   }
6972 
6973   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
6974        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
6975       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
6976     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
6977                                        ? Intrinsic::aarch64_stlxp
6978                                        : Intrinsic::aarch64_stxp);
6979     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
6980 
6981     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
6982     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
6983 
6984     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
6985     llvm::Value *Val = Builder.CreateLoad(Tmp);
6986 
6987     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
6988     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
6989     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
6990                                          Int8PtrTy);
6991     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
6992   }
6993 
6994   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
6995       BuiltinID == AArch64::BI__builtin_arm_stlex) {
6996     Value *StoreVal = EmitScalarExpr(E->getArg(0));
6997     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
6998 
6999     QualType Ty = E->getArg(0)->getType();
7000     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
7001                                                  getContext().getTypeSize(Ty));
7002     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
7003 
7004     if (StoreVal->getType()->isPointerTy())
7005       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
7006     else {
7007       llvm::Type *IntTy = llvm::IntegerType::get(
7008           getLLVMContext(),
7009           CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
7010       StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
7011       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
7012     }
7013 
7014     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
7015                                        ? Intrinsic::aarch64_stlxr
7016                                        : Intrinsic::aarch64_stxr,
7017                                    StoreAddr->getType());
7018     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
7019   }
7020 
7021   if (BuiltinID == AArch64::BI__getReg) {
7022     Expr::EvalResult Result;
7023     if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
7024       llvm_unreachable("Sema will ensure that the parameter is constant");
7025 
7026     llvm::APSInt Value = Result.Val.getInt();
7027     LLVMContext &Context = CGM.getLLVMContext();
7028     std::string Reg = Value == 31 ? "sp" : "x" + Value.toString(10);
7029 
7030     llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
7031     llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
7032     llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
7033 
7034     llvm::Function *F =
7035         CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
7036     return Builder.CreateCall(F, Metadata);
7037   }
7038 
7039   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
7040     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
7041     return Builder.CreateCall(F);
7042   }
7043 
7044   if (BuiltinID == AArch64::BI_ReadWriteBarrier)
7045     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
7046                                llvm::SyncScope::SingleThread);
7047 
7048   // CRC32
7049   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
7050   switch (BuiltinID) {
7051   case AArch64::BI__builtin_arm_crc32b:
7052     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
7053   case AArch64::BI__builtin_arm_crc32cb:
7054     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
7055   case AArch64::BI__builtin_arm_crc32h:
7056     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
7057   case AArch64::BI__builtin_arm_crc32ch:
7058     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
7059   case AArch64::BI__builtin_arm_crc32w:
7060     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
7061   case AArch64::BI__builtin_arm_crc32cw:
7062     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
7063   case AArch64::BI__builtin_arm_crc32d:
7064     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
7065   case AArch64::BI__builtin_arm_crc32cd:
7066     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
7067   }
7068 
7069   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
7070     Value *Arg0 = EmitScalarExpr(E->getArg(0));
7071     Value *Arg1 = EmitScalarExpr(E->getArg(1));
7072     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
7073 
7074     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
7075     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
7076 
7077     return Builder.CreateCall(F, {Arg0, Arg1});
7078   }
7079 
7080   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
7081       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
7082       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
7083       BuiltinID == AArch64::BI__builtin_arm_wsr ||
7084       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
7085       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
7086 
7087     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
7088                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
7089                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
7090 
7091     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
7092                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
7093 
7094     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
7095                    BuiltinID != AArch64::BI__builtin_arm_wsr;
7096 
7097     llvm::Type *ValueType;
7098     llvm::Type *RegisterType = Int64Ty;
7099     if (IsPointerBuiltin) {
7100       ValueType = VoidPtrTy;
7101     } else if (Is64Bit) {
7102       ValueType = Int64Ty;
7103     } else {
7104       ValueType = Int32Ty;
7105     }
7106 
7107     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
7108   }
7109 
7110   if (BuiltinID == AArch64::BI_ReadStatusReg ||
7111       BuiltinID == AArch64::BI_WriteStatusReg) {
7112     LLVMContext &Context = CGM.getLLVMContext();
7113 
7114     unsigned SysReg =
7115       E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
7116 
7117     std::string SysRegStr;
7118     llvm::raw_string_ostream(SysRegStr) <<
7119                        ((1 << 1) | ((SysReg >> 14) & 1))  << ":" <<
7120                        ((SysReg >> 11) & 7)               << ":" <<
7121                        ((SysReg >> 7)  & 15)              << ":" <<
7122                        ((SysReg >> 3)  & 15)              << ":" <<
7123                        ( SysReg        & 7);
7124 
7125     llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
7126     llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
7127     llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
7128 
7129     llvm::Type *RegisterType = Int64Ty;
7130     llvm::Type *Types[] = { RegisterType };
7131 
7132     if (BuiltinID == AArch64::BI_ReadStatusReg) {
7133       llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
7134 
7135       return Builder.CreateCall(F, Metadata);
7136     }
7137 
7138     llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
7139     llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
7140 
7141     return Builder.CreateCall(F, { Metadata, ArgValue });
7142   }
7143 
7144   if (BuiltinID == AArch64::BI_AddressOfReturnAddress) {
7145     llvm::Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
7146     return Builder.CreateCall(F);
7147   }
7148 
7149   // Find out if any arguments are required to be integer constant
7150   // expressions.
7151   unsigned ICEArguments = 0;
7152   ASTContext::GetBuiltinTypeError Error;
7153   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
7154   assert(Error == ASTContext::GE_None && "Should not codegen an error");
7155 
7156   llvm::SmallVector<Value*, 4> Ops;
7157   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
7158     if ((ICEArguments & (1 << i)) == 0) {
7159       Ops.push_back(EmitScalarExpr(E->getArg(i)));
7160     } else {
7161       // If this is required to be a constant, constant fold it so that we know
7162       // that the generated intrinsic gets a ConstantInt.
7163       llvm::APSInt Result;
7164       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
7165       assert(IsConst && "Constant arg isn't actually constant?");
7166       (void)IsConst;
7167       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
7168     }
7169   }
7170 
7171   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
7172   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
7173       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
7174 
7175   if (Builtin) {
7176     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
7177     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
7178     assert(Result && "SISD intrinsic should have been handled");
7179     return Result;
7180   }
7181 
7182   llvm::APSInt Result;
7183   const Expr *Arg = E->getArg(E->getNumArgs()-1);
7184   NeonTypeFlags Type(0);
7185   if (Arg->isIntegerConstantExpr(Result, getContext()))
7186     // Determine the type of this overloaded NEON intrinsic.
7187     Type = NeonTypeFlags(Result.getZExtValue());
7188 
7189   bool usgn = Type.isUnsigned();
7190   bool quad = Type.isQuad();
7191 
7192   // Handle non-overloaded intrinsics first.
7193   switch (BuiltinID) {
7194   default: break;
7195   case NEON::BI__builtin_neon_vabsh_f16:
7196     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7197     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
7198   case NEON::BI__builtin_neon_vldrq_p128: {
7199     llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
7200     llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
7201     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
7202     return Builder.CreateAlignedLoad(Int128Ty, Ptr,
7203                                      CharUnits::fromQuantity(16));
7204   }
7205   case NEON::BI__builtin_neon_vstrq_p128: {
7206     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
7207     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
7208     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
7209   }
7210   case NEON::BI__builtin_neon_vcvts_u32_f32:
7211   case NEON::BI__builtin_neon_vcvtd_u64_f64:
7212     usgn = true;
7213     LLVM_FALLTHROUGH;
7214   case NEON::BI__builtin_neon_vcvts_s32_f32:
7215   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
7216     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7217     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
7218     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
7219     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
7220     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
7221     if (usgn)
7222       return Builder.CreateFPToUI(Ops[0], InTy);
7223     return Builder.CreateFPToSI(Ops[0], InTy);
7224   }
7225   case NEON::BI__builtin_neon_vcvts_f32_u32:
7226   case NEON::BI__builtin_neon_vcvtd_f64_u64:
7227     usgn = true;
7228     LLVM_FALLTHROUGH;
7229   case NEON::BI__builtin_neon_vcvts_f32_s32:
7230   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
7231     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7232     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
7233     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
7234     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
7235     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
7236     if (usgn)
7237       return Builder.CreateUIToFP(Ops[0], FTy);
7238     return Builder.CreateSIToFP(Ops[0], FTy);
7239   }
7240   case NEON::BI__builtin_neon_vcvth_f16_u16:
7241   case NEON::BI__builtin_neon_vcvth_f16_u32:
7242   case NEON::BI__builtin_neon_vcvth_f16_u64:
7243     usgn = true;
7244     LLVM_FALLTHROUGH;
7245   case NEON::BI__builtin_neon_vcvth_f16_s16:
7246   case NEON::BI__builtin_neon_vcvth_f16_s32:
7247   case NEON::BI__builtin_neon_vcvth_f16_s64: {
7248     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7249     llvm::Type *FTy = HalfTy;
7250     llvm::Type *InTy;
7251     if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
7252       InTy = Int64Ty;
7253     else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
7254       InTy = Int32Ty;
7255     else
7256       InTy = Int16Ty;
7257     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
7258     if (usgn)
7259       return Builder.CreateUIToFP(Ops[0], FTy);
7260     return Builder.CreateSIToFP(Ops[0], FTy);
7261   }
7262   case NEON::BI__builtin_neon_vcvth_u16_f16:
7263     usgn = true;
7264     LLVM_FALLTHROUGH;
7265   case NEON::BI__builtin_neon_vcvth_s16_f16: {
7266     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7267     Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
7268     if (usgn)
7269       return Builder.CreateFPToUI(Ops[0], Int16Ty);
7270     return Builder.CreateFPToSI(Ops[0], Int16Ty);
7271   }
7272   case NEON::BI__builtin_neon_vcvth_u32_f16:
7273     usgn = true;
7274     LLVM_FALLTHROUGH;
7275   case NEON::BI__builtin_neon_vcvth_s32_f16: {
7276     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7277     Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
7278     if (usgn)
7279       return Builder.CreateFPToUI(Ops[0], Int32Ty);
7280     return Builder.CreateFPToSI(Ops[0], Int32Ty);
7281   }
7282   case NEON::BI__builtin_neon_vcvth_u64_f16:
7283     usgn = true;
7284     LLVM_FALLTHROUGH;
7285   case NEON::BI__builtin_neon_vcvth_s64_f16: {
7286     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7287     Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
7288     if (usgn)
7289       return Builder.CreateFPToUI(Ops[0], Int64Ty);
7290     return Builder.CreateFPToSI(Ops[0], Int64Ty);
7291   }
7292   case NEON::BI__builtin_neon_vcvtah_u16_f16:
7293   case NEON::BI__builtin_neon_vcvtmh_u16_f16:
7294   case NEON::BI__builtin_neon_vcvtnh_u16_f16:
7295   case NEON::BI__builtin_neon_vcvtph_u16_f16:
7296   case NEON::BI__builtin_neon_vcvtah_s16_f16:
7297   case NEON::BI__builtin_neon_vcvtmh_s16_f16:
7298   case NEON::BI__builtin_neon_vcvtnh_s16_f16:
7299   case NEON::BI__builtin_neon_vcvtph_s16_f16: {
7300     unsigned Int;
7301     llvm::Type* InTy = Int32Ty;
7302     llvm::Type* FTy  = HalfTy;
7303     llvm::Type *Tys[2] = {InTy, FTy};
7304     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7305     switch (BuiltinID) {
7306     default: llvm_unreachable("missing builtin ID in switch!");
7307     case NEON::BI__builtin_neon_vcvtah_u16_f16:
7308       Int = Intrinsic::aarch64_neon_fcvtau; break;
7309     case NEON::BI__builtin_neon_vcvtmh_u16_f16:
7310       Int = Intrinsic::aarch64_neon_fcvtmu; break;
7311     case NEON::BI__builtin_neon_vcvtnh_u16_f16:
7312       Int = Intrinsic::aarch64_neon_fcvtnu; break;
7313     case NEON::BI__builtin_neon_vcvtph_u16_f16:
7314       Int = Intrinsic::aarch64_neon_fcvtpu; break;
7315     case NEON::BI__builtin_neon_vcvtah_s16_f16:
7316       Int = Intrinsic::aarch64_neon_fcvtas; break;
7317     case NEON::BI__builtin_neon_vcvtmh_s16_f16:
7318       Int = Intrinsic::aarch64_neon_fcvtms; break;
7319     case NEON::BI__builtin_neon_vcvtnh_s16_f16:
7320       Int = Intrinsic::aarch64_neon_fcvtns; break;
7321     case NEON::BI__builtin_neon_vcvtph_s16_f16:
7322       Int = Intrinsic::aarch64_neon_fcvtps; break;
7323     }
7324     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
7325     return Builder.CreateTrunc(Ops[0], Int16Ty);
7326   }
7327   case NEON::BI__builtin_neon_vcaleh_f16:
7328   case NEON::BI__builtin_neon_vcalth_f16:
7329   case NEON::BI__builtin_neon_vcageh_f16:
7330   case NEON::BI__builtin_neon_vcagth_f16: {
7331     unsigned Int;
7332     llvm::Type* InTy = Int32Ty;
7333     llvm::Type* FTy  = HalfTy;
7334     llvm::Type *Tys[2] = {InTy, FTy};
7335     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7336     switch (BuiltinID) {
7337     default: llvm_unreachable("missing builtin ID in switch!");
7338     case NEON::BI__builtin_neon_vcageh_f16:
7339       Int = Intrinsic::aarch64_neon_facge; break;
7340     case NEON::BI__builtin_neon_vcagth_f16:
7341       Int = Intrinsic::aarch64_neon_facgt; break;
7342     case NEON::BI__builtin_neon_vcaleh_f16:
7343       Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
7344     case NEON::BI__builtin_neon_vcalth_f16:
7345       Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
7346     }
7347     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
7348     return Builder.CreateTrunc(Ops[0], Int16Ty);
7349   }
7350   case NEON::BI__builtin_neon_vcvth_n_s16_f16:
7351   case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
7352     unsigned Int;
7353     llvm::Type* InTy = Int32Ty;
7354     llvm::Type* FTy  = HalfTy;
7355     llvm::Type *Tys[2] = {InTy, FTy};
7356     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7357     switch (BuiltinID) {
7358     default: llvm_unreachable("missing builtin ID in switch!");
7359     case NEON::BI__builtin_neon_vcvth_n_s16_f16:
7360       Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
7361     case NEON::BI__builtin_neon_vcvth_n_u16_f16:
7362       Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
7363     }
7364     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
7365     return Builder.CreateTrunc(Ops[0], Int16Ty);
7366   }
7367   case NEON::BI__builtin_neon_vcvth_n_f16_s16:
7368   case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
7369     unsigned Int;
7370     llvm::Type* FTy  = HalfTy;
7371     llvm::Type* InTy = Int32Ty;
7372     llvm::Type *Tys[2] = {FTy, InTy};
7373     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7374     switch (BuiltinID) {
7375     default: llvm_unreachable("missing builtin ID in switch!");
7376     case NEON::BI__builtin_neon_vcvth_n_f16_s16:
7377       Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
7378       Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
7379       break;
7380     case NEON::BI__builtin_neon_vcvth_n_f16_u16:
7381       Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
7382       Ops[0] = Builder.CreateZExt(Ops[0], InTy);
7383       break;
7384     }
7385     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
7386   }
7387   case NEON::BI__builtin_neon_vpaddd_s64: {
7388     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
7389     Value *Vec = EmitScalarExpr(E->getArg(0));
7390     // The vector is v2f64, so make sure it's bitcast to that.
7391     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
7392     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
7393     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
7394     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
7395     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
7396     // Pairwise addition of a v2f64 into a scalar f64.
7397     return Builder.CreateAdd(Op0, Op1, "vpaddd");
7398   }
7399   case NEON::BI__builtin_neon_vpaddd_f64: {
7400     llvm::Type *Ty =
7401       llvm::VectorType::get(DoubleTy, 2);
7402     Value *Vec = EmitScalarExpr(E->getArg(0));
7403     // The vector is v2f64, so make sure it's bitcast to that.
7404     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
7405     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
7406     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
7407     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
7408     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
7409     // Pairwise addition of a v2f64 into a scalar f64.
7410     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
7411   }
7412   case NEON::BI__builtin_neon_vpadds_f32: {
7413     llvm::Type *Ty =
7414       llvm::VectorType::get(FloatTy, 2);
7415     Value *Vec = EmitScalarExpr(E->getArg(0));
7416     // The vector is v2f32, so make sure it's bitcast to that.
7417     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
7418     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
7419     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
7420     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
7421     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
7422     // Pairwise addition of a v2f32 into a scalar f32.
7423     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
7424   }
7425   case NEON::BI__builtin_neon_vceqzd_s64:
7426   case NEON::BI__builtin_neon_vceqzd_f64:
7427   case NEON::BI__builtin_neon_vceqzs_f32:
7428   case NEON::BI__builtin_neon_vceqzh_f16:
7429     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7430     return EmitAArch64CompareBuiltinExpr(
7431         Ops[0], ConvertType(E->getCallReturnType(getContext())),
7432         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
7433   case NEON::BI__builtin_neon_vcgezd_s64:
7434   case NEON::BI__builtin_neon_vcgezd_f64:
7435   case NEON::BI__builtin_neon_vcgezs_f32:
7436   case NEON::BI__builtin_neon_vcgezh_f16:
7437     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7438     return EmitAArch64CompareBuiltinExpr(
7439         Ops[0], ConvertType(E->getCallReturnType(getContext())),
7440         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
7441   case NEON::BI__builtin_neon_vclezd_s64:
7442   case NEON::BI__builtin_neon_vclezd_f64:
7443   case NEON::BI__builtin_neon_vclezs_f32:
7444   case NEON::BI__builtin_neon_vclezh_f16:
7445     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7446     return EmitAArch64CompareBuiltinExpr(
7447         Ops[0], ConvertType(E->getCallReturnType(getContext())),
7448         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
7449   case NEON::BI__builtin_neon_vcgtzd_s64:
7450   case NEON::BI__builtin_neon_vcgtzd_f64:
7451   case NEON::BI__builtin_neon_vcgtzs_f32:
7452   case NEON::BI__builtin_neon_vcgtzh_f16:
7453     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7454     return EmitAArch64CompareBuiltinExpr(
7455         Ops[0], ConvertType(E->getCallReturnType(getContext())),
7456         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
7457   case NEON::BI__builtin_neon_vcltzd_s64:
7458   case NEON::BI__builtin_neon_vcltzd_f64:
7459   case NEON::BI__builtin_neon_vcltzs_f32:
7460   case NEON::BI__builtin_neon_vcltzh_f16:
7461     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7462     return EmitAArch64CompareBuiltinExpr(
7463         Ops[0], ConvertType(E->getCallReturnType(getContext())),
7464         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
7465 
7466   case NEON::BI__builtin_neon_vceqzd_u64: {
7467     Ops.push_back(EmitScalarExpr(E->getArg(0)));
7468     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
7469     Ops[0] =
7470         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
7471     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
7472   }
7473   case NEON::BI__builtin_neon_vceqd_f64:
7474   case NEON::BI__builtin_neon_vcled_f64:
7475   case NEON::BI__builtin_neon_vcltd_f64:
7476   case NEON::BI__builtin_neon_vcged_f64:
7477   case NEON::BI__builtin_neon_vcgtd_f64: {
7478     llvm::CmpInst::Predicate P;
7479     switch (BuiltinID) {
7480     default: llvm_unreachable("missing builtin ID in switch!");
7481     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
7482     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
7483     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
7484     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
7485     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
7486     }
7487     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7488     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7489     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
7490     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
7491     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
7492   }
7493   case NEON::BI__builtin_neon_vceqs_f32:
7494   case NEON::BI__builtin_neon_vcles_f32:
7495   case NEON::BI__builtin_neon_vclts_f32:
7496   case NEON::BI__builtin_neon_vcges_f32:
7497   case NEON::BI__builtin_neon_vcgts_f32: {
7498     llvm::CmpInst::Predicate P;
7499     switch (BuiltinID) {
7500     default: llvm_unreachable("missing builtin ID in switch!");
7501     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
7502     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
7503     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
7504     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
7505     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
7506     }
7507     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7508     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
7509     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
7510     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
7511     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
7512   }
7513   case NEON::BI__builtin_neon_vceqh_f16:
7514   case NEON::BI__builtin_neon_vcleh_f16:
7515   case NEON::BI__builtin_neon_vclth_f16:
7516   case NEON::BI__builtin_neon_vcgeh_f16:
7517   case NEON::BI__builtin_neon_vcgth_f16: {
7518     llvm::CmpInst::Predicate P;
7519     switch (BuiltinID) {
7520     default: llvm_unreachable("missing builtin ID in switch!");
7521     case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
7522     case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
7523     case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
7524     case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
7525     case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
7526     }
7527     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7528     Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
7529     Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
7530     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
7531     return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
7532   }
7533   case NEON::BI__builtin_neon_vceqd_s64:
7534   case NEON::BI__builtin_neon_vceqd_u64:
7535   case NEON::BI__builtin_neon_vcgtd_s64:
7536   case NEON::BI__builtin_neon_vcgtd_u64:
7537   case NEON::BI__builtin_neon_vcltd_s64:
7538   case NEON::BI__builtin_neon_vcltd_u64:
7539   case NEON::BI__builtin_neon_vcged_u64:
7540   case NEON::BI__builtin_neon_vcged_s64:
7541   case NEON::BI__builtin_neon_vcled_u64:
7542   case NEON::BI__builtin_neon_vcled_s64: {
7543     llvm::CmpInst::Predicate P;
7544     switch (BuiltinID) {
7545     default: llvm_unreachable("missing builtin ID in switch!");
7546     case NEON::BI__builtin_neon_vceqd_s64:
7547     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
7548     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
7549     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
7550     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
7551     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
7552     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
7553     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
7554     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
7555     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
7556     }
7557     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7558     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
7559     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
7560     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
7561     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
7562   }
7563   case NEON::BI__builtin_neon_vtstd_s64:
7564   case NEON::BI__builtin_neon_vtstd_u64: {
7565     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7566     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
7567     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
7568     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
7569     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
7570                                 llvm::Constant::getNullValue(Int64Ty));
7571     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
7572   }
7573   case NEON::BI__builtin_neon_vset_lane_i8:
7574   case NEON::BI__builtin_neon_vset_lane_i16:
7575   case NEON::BI__builtin_neon_vset_lane_i32:
7576   case NEON::BI__builtin_neon_vset_lane_i64:
7577   case NEON::BI__builtin_neon_vset_lane_f32:
7578   case NEON::BI__builtin_neon_vsetq_lane_i8:
7579   case NEON::BI__builtin_neon_vsetq_lane_i16:
7580   case NEON::BI__builtin_neon_vsetq_lane_i32:
7581   case NEON::BI__builtin_neon_vsetq_lane_i64:
7582   case NEON::BI__builtin_neon_vsetq_lane_f32:
7583     Ops.push_back(EmitScalarExpr(E->getArg(2)));
7584     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
7585   case NEON::BI__builtin_neon_vset_lane_f64:
7586     // The vector type needs a cast for the v1f64 variant.
7587     Ops[1] = Builder.CreateBitCast(Ops[1],
7588                                    llvm::VectorType::get(DoubleTy, 1));
7589     Ops.push_back(EmitScalarExpr(E->getArg(2)));
7590     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
7591   case NEON::BI__builtin_neon_vsetq_lane_f64:
7592     // The vector type needs a cast for the v2f64 variant.
7593     Ops[1] = Builder.CreateBitCast(Ops[1],
7594         llvm::VectorType::get(DoubleTy, 2));
7595     Ops.push_back(EmitScalarExpr(E->getArg(2)));
7596     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
7597 
7598   case NEON::BI__builtin_neon_vget_lane_i8:
7599   case NEON::BI__builtin_neon_vdupb_lane_i8:
7600     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
7601     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7602                                         "vget_lane");
7603   case NEON::BI__builtin_neon_vgetq_lane_i8:
7604   case NEON::BI__builtin_neon_vdupb_laneq_i8:
7605     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
7606     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7607                                         "vgetq_lane");
7608   case NEON::BI__builtin_neon_vget_lane_i16:
7609   case NEON::BI__builtin_neon_vduph_lane_i16:
7610     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
7611     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7612                                         "vget_lane");
7613   case NEON::BI__builtin_neon_vgetq_lane_i16:
7614   case NEON::BI__builtin_neon_vduph_laneq_i16:
7615     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
7616     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7617                                         "vgetq_lane");
7618   case NEON::BI__builtin_neon_vget_lane_i32:
7619   case NEON::BI__builtin_neon_vdups_lane_i32:
7620     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
7621     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7622                                         "vget_lane");
7623   case NEON::BI__builtin_neon_vdups_lane_f32:
7624     Ops[0] = Builder.CreateBitCast(Ops[0],
7625         llvm::VectorType::get(FloatTy, 2));
7626     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7627                                         "vdups_lane");
7628   case NEON::BI__builtin_neon_vgetq_lane_i32:
7629   case NEON::BI__builtin_neon_vdups_laneq_i32:
7630     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
7631     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7632                                         "vgetq_lane");
7633   case NEON::BI__builtin_neon_vget_lane_i64:
7634   case NEON::BI__builtin_neon_vdupd_lane_i64:
7635     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
7636     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7637                                         "vget_lane");
7638   case NEON::BI__builtin_neon_vdupd_lane_f64:
7639     Ops[0] = Builder.CreateBitCast(Ops[0],
7640         llvm::VectorType::get(DoubleTy, 1));
7641     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7642                                         "vdupd_lane");
7643   case NEON::BI__builtin_neon_vgetq_lane_i64:
7644   case NEON::BI__builtin_neon_vdupd_laneq_i64:
7645     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
7646     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7647                                         "vgetq_lane");
7648   case NEON::BI__builtin_neon_vget_lane_f32:
7649     Ops[0] = Builder.CreateBitCast(Ops[0],
7650         llvm::VectorType::get(FloatTy, 2));
7651     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7652                                         "vget_lane");
7653   case NEON::BI__builtin_neon_vget_lane_f64:
7654     Ops[0] = Builder.CreateBitCast(Ops[0],
7655         llvm::VectorType::get(DoubleTy, 1));
7656     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7657                                         "vget_lane");
7658   case NEON::BI__builtin_neon_vgetq_lane_f32:
7659   case NEON::BI__builtin_neon_vdups_laneq_f32:
7660     Ops[0] = Builder.CreateBitCast(Ops[0],
7661         llvm::VectorType::get(FloatTy, 4));
7662     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7663                                         "vgetq_lane");
7664   case NEON::BI__builtin_neon_vgetq_lane_f64:
7665   case NEON::BI__builtin_neon_vdupd_laneq_f64:
7666     Ops[0] = Builder.CreateBitCast(Ops[0],
7667         llvm::VectorType::get(DoubleTy, 2));
7668     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7669                                         "vgetq_lane");
7670   case NEON::BI__builtin_neon_vaddh_f16:
7671     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7672     return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
7673   case NEON::BI__builtin_neon_vsubh_f16:
7674     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7675     return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
7676   case NEON::BI__builtin_neon_vmulh_f16:
7677     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7678     return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
7679   case NEON::BI__builtin_neon_vdivh_f16:
7680     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7681     return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
7682   case NEON::BI__builtin_neon_vfmah_f16: {
7683     Function *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
7684     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7685     return Builder.CreateCall(F,
7686       {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
7687   }
7688   case NEON::BI__builtin_neon_vfmsh_f16: {
7689     Function *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
7690     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy);
7691     Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh");
7692     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7693     return Builder.CreateCall(F, {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
7694   }
7695   case NEON::BI__builtin_neon_vaddd_s64:
7696   case NEON::BI__builtin_neon_vaddd_u64:
7697     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
7698   case NEON::BI__builtin_neon_vsubd_s64:
7699   case NEON::BI__builtin_neon_vsubd_u64:
7700     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
7701   case NEON::BI__builtin_neon_vqdmlalh_s16:
7702   case NEON::BI__builtin_neon_vqdmlslh_s16: {
7703     SmallVector<Value *, 2> ProductOps;
7704     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
7705     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
7706     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
7707     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
7708                           ProductOps, "vqdmlXl");
7709     Constant *CI = ConstantInt::get(SizeTy, 0);
7710     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
7711 
7712     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
7713                                         ? Intrinsic::aarch64_neon_sqadd
7714                                         : Intrinsic::aarch64_neon_sqsub;
7715     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
7716   }
7717   case NEON::BI__builtin_neon_vqshlud_n_s64: {
7718     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7719     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
7720     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
7721                         Ops, "vqshlu_n");
7722   }
7723   case NEON::BI__builtin_neon_vqshld_n_u64:
7724   case NEON::BI__builtin_neon_vqshld_n_s64: {
7725     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
7726                                    ? Intrinsic::aarch64_neon_uqshl
7727                                    : Intrinsic::aarch64_neon_sqshl;
7728     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7729     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
7730     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
7731   }
7732   case NEON::BI__builtin_neon_vrshrd_n_u64:
7733   case NEON::BI__builtin_neon_vrshrd_n_s64: {
7734     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
7735                                    ? Intrinsic::aarch64_neon_urshl
7736                                    : Intrinsic::aarch64_neon_srshl;
7737     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7738     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
7739     Ops[1] = ConstantInt::get(Int64Ty, -SV);
7740     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
7741   }
7742   case NEON::BI__builtin_neon_vrsrad_n_u64:
7743   case NEON::BI__builtin_neon_vrsrad_n_s64: {
7744     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
7745                                    ? Intrinsic::aarch64_neon_urshl
7746                                    : Intrinsic::aarch64_neon_srshl;
7747     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
7748     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
7749     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
7750                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
7751     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
7752   }
7753   case NEON::BI__builtin_neon_vshld_n_s64:
7754   case NEON::BI__builtin_neon_vshld_n_u64: {
7755     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7756     return Builder.CreateShl(
7757         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
7758   }
7759   case NEON::BI__builtin_neon_vshrd_n_s64: {
7760     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7761     return Builder.CreateAShr(
7762         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
7763                                                    Amt->getZExtValue())),
7764         "shrd_n");
7765   }
7766   case NEON::BI__builtin_neon_vshrd_n_u64: {
7767     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7768     uint64_t ShiftAmt = Amt->getZExtValue();
7769     // Right-shifting an unsigned value by its size yields 0.
7770     if (ShiftAmt == 64)
7771       return ConstantInt::get(Int64Ty, 0);
7772     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
7773                               "shrd_n");
7774   }
7775   case NEON::BI__builtin_neon_vsrad_n_s64: {
7776     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
7777     Ops[1] = Builder.CreateAShr(
7778         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
7779                                                    Amt->getZExtValue())),
7780         "shrd_n");
7781     return Builder.CreateAdd(Ops[0], Ops[1]);
7782   }
7783   case NEON::BI__builtin_neon_vsrad_n_u64: {
7784     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
7785     uint64_t ShiftAmt = Amt->getZExtValue();
7786     // Right-shifting an unsigned value by its size yields 0.
7787     // As Op + 0 = Op, return Ops[0] directly.
7788     if (ShiftAmt == 64)
7789       return Ops[0];
7790     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
7791                                 "shrd_n");
7792     return Builder.CreateAdd(Ops[0], Ops[1]);
7793   }
7794   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
7795   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
7796   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
7797   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
7798     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
7799                                           "lane");
7800     SmallVector<Value *, 2> ProductOps;
7801     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
7802     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
7803     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
7804     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
7805                           ProductOps, "vqdmlXl");
7806     Constant *CI = ConstantInt::get(SizeTy, 0);
7807     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
7808     Ops.pop_back();
7809 
7810     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
7811                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
7812                           ? Intrinsic::aarch64_neon_sqadd
7813                           : Intrinsic::aarch64_neon_sqsub;
7814     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
7815   }
7816   case NEON::BI__builtin_neon_vqdmlals_s32:
7817   case NEON::BI__builtin_neon_vqdmlsls_s32: {
7818     SmallVector<Value *, 2> ProductOps;
7819     ProductOps.push_back(Ops[1]);
7820     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
7821     Ops[1] =
7822         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
7823                      ProductOps, "vqdmlXl");
7824 
7825     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
7826                                         ? Intrinsic::aarch64_neon_sqadd
7827                                         : Intrinsic::aarch64_neon_sqsub;
7828     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
7829   }
7830   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
7831   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
7832   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
7833   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
7834     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
7835                                           "lane");
7836     SmallVector<Value *, 2> ProductOps;
7837     ProductOps.push_back(Ops[1]);
7838     ProductOps.push_back(Ops[2]);
7839     Ops[1] =
7840         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
7841                      ProductOps, "vqdmlXl");
7842     Ops.pop_back();
7843 
7844     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
7845                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
7846                           ? Intrinsic::aarch64_neon_sqadd
7847                           : Intrinsic::aarch64_neon_sqsub;
7848     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
7849   }
7850   case NEON::BI__builtin_neon_vduph_lane_f16: {
7851     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7852                                         "vget_lane");
7853   }
7854   case NEON::BI__builtin_neon_vduph_laneq_f16: {
7855     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7856                                         "vgetq_lane");
7857   }
7858   }
7859 
7860   llvm::VectorType *VTy = GetNeonType(this, Type);
7861   llvm::Type *Ty = VTy;
7862   if (!Ty)
7863     return nullptr;
7864 
7865   // Not all intrinsics handled by the common case work for AArch64 yet, so only
7866   // defer to common code if it's been added to our special map.
7867   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
7868                                    AArch64SIMDIntrinsicsProvenSorted);
7869 
7870   if (Builtin)
7871     return EmitCommonNeonBuiltinExpr(
7872         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
7873         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
7874         /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
7875 
7876   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
7877     return V;
7878 
7879   unsigned Int;
7880   switch (BuiltinID) {
7881   default: return nullptr;
7882   case NEON::BI__builtin_neon_vbsl_v:
7883   case NEON::BI__builtin_neon_vbslq_v: {
7884     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
7885     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
7886     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
7887     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
7888 
7889     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
7890     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
7891     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
7892     return Builder.CreateBitCast(Ops[0], Ty);
7893   }
7894   case NEON::BI__builtin_neon_vfma_lane_v:
7895   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
7896     // The ARM builtins (and instructions) have the addend as the first
7897     // operand, but the 'fma' intrinsics have it last. Swap it around here.
7898     Value *Addend = Ops[0];
7899     Value *Multiplicand = Ops[1];
7900     Value *LaneSource = Ops[2];
7901     Ops[0] = Multiplicand;
7902     Ops[1] = LaneSource;
7903     Ops[2] = Addend;
7904 
7905     // Now adjust things to handle the lane access.
7906     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
7907       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
7908       VTy;
7909     llvm::Constant *cst = cast<Constant>(Ops[3]);
7910     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
7911     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
7912     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
7913 
7914     Ops.pop_back();
7915     Int = Intrinsic::fma;
7916     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
7917   }
7918   case NEON::BI__builtin_neon_vfma_laneq_v: {
7919     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
7920     // v1f64 fma should be mapped to Neon scalar f64 fma
7921     if (VTy && VTy->getElementType() == DoubleTy) {
7922       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7923       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
7924       llvm::Type *VTy = GetNeonType(this,
7925         NeonTypeFlags(NeonTypeFlags::Float64, false, true));
7926       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
7927       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
7928       Function *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
7929       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
7930       return Builder.CreateBitCast(Result, Ty);
7931     }
7932     Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
7933     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7934     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7935 
7936     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
7937                                             VTy->getNumElements() * 2);
7938     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
7939     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
7940                                                cast<ConstantInt>(Ops[3]));
7941     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
7942 
7943     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
7944   }
7945   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
7946     Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
7947     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7948     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7949 
7950     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7951     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
7952     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
7953   }
7954   case NEON::BI__builtin_neon_vfmah_lane_f16:
7955   case NEON::BI__builtin_neon_vfmas_lane_f32:
7956   case NEON::BI__builtin_neon_vfmah_laneq_f16:
7957   case NEON::BI__builtin_neon_vfmas_laneq_f32:
7958   case NEON::BI__builtin_neon_vfmad_lane_f64:
7959   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
7960     Ops.push_back(EmitScalarExpr(E->getArg(3)));
7961     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
7962     Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
7963     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
7964     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
7965   }
7966   case NEON::BI__builtin_neon_vmull_v:
7967     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
7968     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
7969     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
7970     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
7971   case NEON::BI__builtin_neon_vmax_v:
7972   case NEON::BI__builtin_neon_vmaxq_v:
7973     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
7974     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
7975     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
7976     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
7977   case NEON::BI__builtin_neon_vmaxh_f16: {
7978     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7979     Int = Intrinsic::aarch64_neon_fmax;
7980     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
7981   }
7982   case NEON::BI__builtin_neon_vmin_v:
7983   case NEON::BI__builtin_neon_vminq_v:
7984     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
7985     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
7986     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
7987     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
7988   case NEON::BI__builtin_neon_vminh_f16: {
7989     Ops.push_back(EmitScalarExpr(E->getArg(1)));
7990     Int = Intrinsic::aarch64_neon_fmin;
7991     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
7992   }
7993   case NEON::BI__builtin_neon_vabd_v:
7994   case NEON::BI__builtin_neon_vabdq_v:
7995     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
7996     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
7997     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
7998     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
7999   case NEON::BI__builtin_neon_vpadal_v:
8000   case NEON::BI__builtin_neon_vpadalq_v: {
8001     unsigned ArgElts = VTy->getNumElements();
8002     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
8003     unsigned BitWidth = EltTy->getBitWidth();
8004     llvm::Type *ArgTy = llvm::VectorType::get(
8005         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
8006     llvm::Type* Tys[2] = { VTy, ArgTy };
8007     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
8008     SmallVector<llvm::Value*, 1> TmpOps;
8009     TmpOps.push_back(Ops[1]);
8010     Function *F = CGM.getIntrinsic(Int, Tys);
8011     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
8012     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
8013     return Builder.CreateAdd(tmp, addend);
8014   }
8015   case NEON::BI__builtin_neon_vpmin_v:
8016   case NEON::BI__builtin_neon_vpminq_v:
8017     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
8018     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
8019     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
8020     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
8021   case NEON::BI__builtin_neon_vpmax_v:
8022   case NEON::BI__builtin_neon_vpmaxq_v:
8023     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
8024     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
8025     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
8026     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
8027   case NEON::BI__builtin_neon_vminnm_v:
8028   case NEON::BI__builtin_neon_vminnmq_v:
8029     Int = Intrinsic::aarch64_neon_fminnm;
8030     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
8031   case NEON::BI__builtin_neon_vminnmh_f16:
8032     Ops.push_back(EmitScalarExpr(E->getArg(1)));
8033     Int = Intrinsic::aarch64_neon_fminnm;
8034     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
8035   case NEON::BI__builtin_neon_vmaxnm_v:
8036   case NEON::BI__builtin_neon_vmaxnmq_v:
8037     Int = Intrinsic::aarch64_neon_fmaxnm;
8038     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
8039   case NEON::BI__builtin_neon_vmaxnmh_f16:
8040     Ops.push_back(EmitScalarExpr(E->getArg(1)));
8041     Int = Intrinsic::aarch64_neon_fmaxnm;
8042     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
8043   case NEON::BI__builtin_neon_vrecpss_f32: {
8044     Ops.push_back(EmitScalarExpr(E->getArg(1)));
8045     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
8046                         Ops, "vrecps");
8047   }
8048   case NEON::BI__builtin_neon_vrecpsd_f64:
8049     Ops.push_back(EmitScalarExpr(E->getArg(1)));
8050     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
8051                         Ops, "vrecps");
8052   case NEON::BI__builtin_neon_vrecpsh_f16:
8053     Ops.push_back(EmitScalarExpr(E->getArg(1)));
8054     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
8055                         Ops, "vrecps");
8056   case NEON::BI__builtin_neon_vqshrun_n_v:
8057     Int = Intrinsic::aarch64_neon_sqshrun;
8058     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
8059   case NEON::BI__builtin_neon_vqrshrun_n_v:
8060     Int = Intrinsic::aarch64_neon_sqrshrun;
8061     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
8062   case NEON::BI__builtin_neon_vqshrn_n_v:
8063     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
8064     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
8065   case NEON::BI__builtin_neon_vrshrn_n_v:
8066     Int = Intrinsic::aarch64_neon_rshrn;
8067     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
8068   case NEON::BI__builtin_neon_vqrshrn_n_v:
8069     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
8070     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
8071   case NEON::BI__builtin_neon_vrndah_f16: {
8072     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8073     Int = Intrinsic::round;
8074     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
8075   }
8076   case NEON::BI__builtin_neon_vrnda_v:
8077   case NEON::BI__builtin_neon_vrndaq_v: {
8078     Int = Intrinsic::round;
8079     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
8080   }
8081   case NEON::BI__builtin_neon_vrndih_f16: {
8082     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8083     Int = Intrinsic::nearbyint;
8084     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
8085   }
8086   case NEON::BI__builtin_neon_vrndmh_f16: {
8087     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8088     Int = Intrinsic::floor;
8089     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
8090   }
8091   case NEON::BI__builtin_neon_vrndm_v:
8092   case NEON::BI__builtin_neon_vrndmq_v: {
8093     Int = Intrinsic::floor;
8094     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
8095   }
8096   case NEON::BI__builtin_neon_vrndnh_f16: {
8097     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8098     Int = Intrinsic::aarch64_neon_frintn;
8099     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
8100   }
8101   case NEON::BI__builtin_neon_vrndn_v:
8102   case NEON::BI__builtin_neon_vrndnq_v: {
8103     Int = Intrinsic::aarch64_neon_frintn;
8104     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
8105   }
8106   case NEON::BI__builtin_neon_vrndns_f32: {
8107     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8108     Int = Intrinsic::aarch64_neon_frintn;
8109     return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
8110   }
8111   case NEON::BI__builtin_neon_vrndph_f16: {
8112     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8113     Int = Intrinsic::ceil;
8114     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
8115   }
8116   case NEON::BI__builtin_neon_vrndp_v:
8117   case NEON::BI__builtin_neon_vrndpq_v: {
8118     Int = Intrinsic::ceil;
8119     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
8120   }
8121   case NEON::BI__builtin_neon_vrndxh_f16: {
8122     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8123     Int = Intrinsic::rint;
8124     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
8125   }
8126   case NEON::BI__builtin_neon_vrndx_v:
8127   case NEON::BI__builtin_neon_vrndxq_v: {
8128     Int = Intrinsic::rint;
8129     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
8130   }
8131   case NEON::BI__builtin_neon_vrndh_f16: {
8132     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8133     Int = Intrinsic::trunc;
8134     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
8135   }
8136   case NEON::BI__builtin_neon_vrnd_v:
8137   case NEON::BI__builtin_neon_vrndq_v: {
8138     Int = Intrinsic::trunc;
8139     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
8140   }
8141   case NEON::BI__builtin_neon_vcvt_f64_v:
8142   case NEON::BI__builtin_neon_vcvtq_f64_v:
8143     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8144     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
8145     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
8146                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
8147   case NEON::BI__builtin_neon_vcvt_f64_f32: {
8148     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
8149            "unexpected vcvt_f64_f32 builtin");
8150     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
8151     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
8152 
8153     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
8154   }
8155   case NEON::BI__builtin_neon_vcvt_f32_f64: {
8156     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
8157            "unexpected vcvt_f32_f64 builtin");
8158     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
8159     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
8160 
8161     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
8162   }
8163   case NEON::BI__builtin_neon_vcvt_s32_v:
8164   case NEON::BI__builtin_neon_vcvt_u32_v:
8165   case NEON::BI__builtin_neon_vcvt_s64_v:
8166   case NEON::BI__builtin_neon_vcvt_u64_v:
8167   case NEON::BI__builtin_neon_vcvt_s16_v:
8168   case NEON::BI__builtin_neon_vcvt_u16_v:
8169   case NEON::BI__builtin_neon_vcvtq_s32_v:
8170   case NEON::BI__builtin_neon_vcvtq_u32_v:
8171   case NEON::BI__builtin_neon_vcvtq_s64_v:
8172   case NEON::BI__builtin_neon_vcvtq_u64_v:
8173   case NEON::BI__builtin_neon_vcvtq_s16_v:
8174   case NEON::BI__builtin_neon_vcvtq_u16_v: {
8175     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
8176     if (usgn)
8177       return Builder.CreateFPToUI(Ops[0], Ty);
8178     return Builder.CreateFPToSI(Ops[0], Ty);
8179   }
8180   case NEON::BI__builtin_neon_vcvta_s16_v:
8181   case NEON::BI__builtin_neon_vcvta_u16_v:
8182   case NEON::BI__builtin_neon_vcvta_s32_v:
8183   case NEON::BI__builtin_neon_vcvtaq_s16_v:
8184   case NEON::BI__builtin_neon_vcvtaq_s32_v:
8185   case NEON::BI__builtin_neon_vcvta_u32_v:
8186   case NEON::BI__builtin_neon_vcvtaq_u16_v:
8187   case NEON::BI__builtin_neon_vcvtaq_u32_v:
8188   case NEON::BI__builtin_neon_vcvta_s64_v:
8189   case NEON::BI__builtin_neon_vcvtaq_s64_v:
8190   case NEON::BI__builtin_neon_vcvta_u64_v:
8191   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
8192     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
8193     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8194     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
8195   }
8196   case NEON::BI__builtin_neon_vcvtm_s16_v:
8197   case NEON::BI__builtin_neon_vcvtm_s32_v:
8198   case NEON::BI__builtin_neon_vcvtmq_s16_v:
8199   case NEON::BI__builtin_neon_vcvtmq_s32_v:
8200   case NEON::BI__builtin_neon_vcvtm_u16_v:
8201   case NEON::BI__builtin_neon_vcvtm_u32_v:
8202   case NEON::BI__builtin_neon_vcvtmq_u16_v:
8203   case NEON::BI__builtin_neon_vcvtmq_u32_v:
8204   case NEON::BI__builtin_neon_vcvtm_s64_v:
8205   case NEON::BI__builtin_neon_vcvtmq_s64_v:
8206   case NEON::BI__builtin_neon_vcvtm_u64_v:
8207   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
8208     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
8209     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8210     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
8211   }
8212   case NEON::BI__builtin_neon_vcvtn_s16_v:
8213   case NEON::BI__builtin_neon_vcvtn_s32_v:
8214   case NEON::BI__builtin_neon_vcvtnq_s16_v:
8215   case NEON::BI__builtin_neon_vcvtnq_s32_v:
8216   case NEON::BI__builtin_neon_vcvtn_u16_v:
8217   case NEON::BI__builtin_neon_vcvtn_u32_v:
8218   case NEON::BI__builtin_neon_vcvtnq_u16_v:
8219   case NEON::BI__builtin_neon_vcvtnq_u32_v:
8220   case NEON::BI__builtin_neon_vcvtn_s64_v:
8221   case NEON::BI__builtin_neon_vcvtnq_s64_v:
8222   case NEON::BI__builtin_neon_vcvtn_u64_v:
8223   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
8224     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
8225     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8226     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
8227   }
8228   case NEON::BI__builtin_neon_vcvtp_s16_v:
8229   case NEON::BI__builtin_neon_vcvtp_s32_v:
8230   case NEON::BI__builtin_neon_vcvtpq_s16_v:
8231   case NEON::BI__builtin_neon_vcvtpq_s32_v:
8232   case NEON::BI__builtin_neon_vcvtp_u16_v:
8233   case NEON::BI__builtin_neon_vcvtp_u32_v:
8234   case NEON::BI__builtin_neon_vcvtpq_u16_v:
8235   case NEON::BI__builtin_neon_vcvtpq_u32_v:
8236   case NEON::BI__builtin_neon_vcvtp_s64_v:
8237   case NEON::BI__builtin_neon_vcvtpq_s64_v:
8238   case NEON::BI__builtin_neon_vcvtp_u64_v:
8239   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
8240     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
8241     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8242     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
8243   }
8244   case NEON::BI__builtin_neon_vmulx_v:
8245   case NEON::BI__builtin_neon_vmulxq_v: {
8246     Int = Intrinsic::aarch64_neon_fmulx;
8247     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
8248   }
8249   case NEON::BI__builtin_neon_vmulxh_lane_f16:
8250   case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
8251     // vmulx_lane should be mapped to Neon scalar mulx after
8252     // extracting the scalar element
8253     Ops.push_back(EmitScalarExpr(E->getArg(2)));
8254     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
8255     Ops.pop_back();
8256     Int = Intrinsic::aarch64_neon_fmulx;
8257     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
8258   }
8259   case NEON::BI__builtin_neon_vmul_lane_v:
8260   case NEON::BI__builtin_neon_vmul_laneq_v: {
8261     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
8262     bool Quad = false;
8263     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
8264       Quad = true;
8265     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
8266     llvm::Type *VTy = GetNeonType(this,
8267       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
8268     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
8269     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
8270     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
8271     return Builder.CreateBitCast(Result, Ty);
8272   }
8273   case NEON::BI__builtin_neon_vnegd_s64:
8274     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
8275   case NEON::BI__builtin_neon_vnegh_f16:
8276     return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
8277   case NEON::BI__builtin_neon_vpmaxnm_v:
8278   case NEON::BI__builtin_neon_vpmaxnmq_v: {
8279     Int = Intrinsic::aarch64_neon_fmaxnmp;
8280     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
8281   }
8282   case NEON::BI__builtin_neon_vpminnm_v:
8283   case NEON::BI__builtin_neon_vpminnmq_v: {
8284     Int = Intrinsic::aarch64_neon_fminnmp;
8285     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
8286   }
8287   case NEON::BI__builtin_neon_vsqrth_f16: {
8288     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8289     Int = Intrinsic::sqrt;
8290     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
8291   }
8292   case NEON::BI__builtin_neon_vsqrt_v:
8293   case NEON::BI__builtin_neon_vsqrtq_v: {
8294     Int = Intrinsic::sqrt;
8295     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8296     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
8297   }
8298   case NEON::BI__builtin_neon_vrbit_v:
8299   case NEON::BI__builtin_neon_vrbitq_v: {
8300     Int = Intrinsic::aarch64_neon_rbit;
8301     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
8302   }
8303   case NEON::BI__builtin_neon_vaddv_u8:
8304     // FIXME: These are handled by the AArch64 scalar code.
8305     usgn = true;
8306     LLVM_FALLTHROUGH;
8307   case NEON::BI__builtin_neon_vaddv_s8: {
8308     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
8309     Ty = Int32Ty;
8310     VTy = llvm::VectorType::get(Int8Ty, 8);
8311     llvm::Type *Tys[2] = { Ty, VTy };
8312     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8313     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
8314     return Builder.CreateTrunc(Ops[0], Int8Ty);
8315   }
8316   case NEON::BI__builtin_neon_vaddv_u16:
8317     usgn = true;
8318     LLVM_FALLTHROUGH;
8319   case NEON::BI__builtin_neon_vaddv_s16: {
8320     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
8321     Ty = Int32Ty;
8322     VTy = llvm::VectorType::get(Int16Ty, 4);
8323     llvm::Type *Tys[2] = { Ty, VTy };
8324     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8325     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
8326     return Builder.CreateTrunc(Ops[0], Int16Ty);
8327   }
8328   case NEON::BI__builtin_neon_vaddvq_u8:
8329     usgn = true;
8330     LLVM_FALLTHROUGH;
8331   case NEON::BI__builtin_neon_vaddvq_s8: {
8332     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
8333     Ty = Int32Ty;
8334     VTy = llvm::VectorType::get(Int8Ty, 16);
8335     llvm::Type *Tys[2] = { Ty, VTy };
8336     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8337     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
8338     return Builder.CreateTrunc(Ops[0], Int8Ty);
8339   }
8340   case NEON::BI__builtin_neon_vaddvq_u16:
8341     usgn = true;
8342     LLVM_FALLTHROUGH;
8343   case NEON::BI__builtin_neon_vaddvq_s16: {
8344     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
8345     Ty = Int32Ty;
8346     VTy = llvm::VectorType::get(Int16Ty, 8);
8347     llvm::Type *Tys[2] = { Ty, VTy };
8348     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8349     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
8350     return Builder.CreateTrunc(Ops[0], Int16Ty);
8351   }
8352   case NEON::BI__builtin_neon_vmaxv_u8: {
8353     Int = Intrinsic::aarch64_neon_umaxv;
8354     Ty = Int32Ty;
8355     VTy = llvm::VectorType::get(Int8Ty, 8);
8356     llvm::Type *Tys[2] = { Ty, VTy };
8357     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8358     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8359     return Builder.CreateTrunc(Ops[0], Int8Ty);
8360   }
8361   case NEON::BI__builtin_neon_vmaxv_u16: {
8362     Int = Intrinsic::aarch64_neon_umaxv;
8363     Ty = Int32Ty;
8364     VTy = llvm::VectorType::get(Int16Ty, 4);
8365     llvm::Type *Tys[2] = { Ty, VTy };
8366     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8367     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8368     return Builder.CreateTrunc(Ops[0], Int16Ty);
8369   }
8370   case NEON::BI__builtin_neon_vmaxvq_u8: {
8371     Int = Intrinsic::aarch64_neon_umaxv;
8372     Ty = Int32Ty;
8373     VTy = llvm::VectorType::get(Int8Ty, 16);
8374     llvm::Type *Tys[2] = { Ty, VTy };
8375     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8376     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8377     return Builder.CreateTrunc(Ops[0], Int8Ty);
8378   }
8379   case NEON::BI__builtin_neon_vmaxvq_u16: {
8380     Int = Intrinsic::aarch64_neon_umaxv;
8381     Ty = Int32Ty;
8382     VTy = llvm::VectorType::get(Int16Ty, 8);
8383     llvm::Type *Tys[2] = { Ty, VTy };
8384     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8385     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8386     return Builder.CreateTrunc(Ops[0], Int16Ty);
8387   }
8388   case NEON::BI__builtin_neon_vmaxv_s8: {
8389     Int = Intrinsic::aarch64_neon_smaxv;
8390     Ty = Int32Ty;
8391     VTy = llvm::VectorType::get(Int8Ty, 8);
8392     llvm::Type *Tys[2] = { Ty, VTy };
8393     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8394     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8395     return Builder.CreateTrunc(Ops[0], Int8Ty);
8396   }
8397   case NEON::BI__builtin_neon_vmaxv_s16: {
8398     Int = Intrinsic::aarch64_neon_smaxv;
8399     Ty = Int32Ty;
8400     VTy = llvm::VectorType::get(Int16Ty, 4);
8401     llvm::Type *Tys[2] = { Ty, VTy };
8402     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8403     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8404     return Builder.CreateTrunc(Ops[0], Int16Ty);
8405   }
8406   case NEON::BI__builtin_neon_vmaxvq_s8: {
8407     Int = Intrinsic::aarch64_neon_smaxv;
8408     Ty = Int32Ty;
8409     VTy = llvm::VectorType::get(Int8Ty, 16);
8410     llvm::Type *Tys[2] = { Ty, VTy };
8411     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8412     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8413     return Builder.CreateTrunc(Ops[0], Int8Ty);
8414   }
8415   case NEON::BI__builtin_neon_vmaxvq_s16: {
8416     Int = Intrinsic::aarch64_neon_smaxv;
8417     Ty = Int32Ty;
8418     VTy = llvm::VectorType::get(Int16Ty, 8);
8419     llvm::Type *Tys[2] = { Ty, VTy };
8420     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8421     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8422     return Builder.CreateTrunc(Ops[0], Int16Ty);
8423   }
8424   case NEON::BI__builtin_neon_vmaxv_f16: {
8425     Int = Intrinsic::aarch64_neon_fmaxv;
8426     Ty = HalfTy;
8427     VTy = llvm::VectorType::get(HalfTy, 4);
8428     llvm::Type *Tys[2] = { Ty, VTy };
8429     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8430     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8431     return Builder.CreateTrunc(Ops[0], HalfTy);
8432   }
8433   case NEON::BI__builtin_neon_vmaxvq_f16: {
8434     Int = Intrinsic::aarch64_neon_fmaxv;
8435     Ty = HalfTy;
8436     VTy = llvm::VectorType::get(HalfTy, 8);
8437     llvm::Type *Tys[2] = { Ty, VTy };
8438     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8439     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8440     return Builder.CreateTrunc(Ops[0], HalfTy);
8441   }
8442   case NEON::BI__builtin_neon_vminv_u8: {
8443     Int = Intrinsic::aarch64_neon_uminv;
8444     Ty = Int32Ty;
8445     VTy = llvm::VectorType::get(Int8Ty, 8);
8446     llvm::Type *Tys[2] = { Ty, VTy };
8447     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8448     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8449     return Builder.CreateTrunc(Ops[0], Int8Ty);
8450   }
8451   case NEON::BI__builtin_neon_vminv_u16: {
8452     Int = Intrinsic::aarch64_neon_uminv;
8453     Ty = Int32Ty;
8454     VTy = llvm::VectorType::get(Int16Ty, 4);
8455     llvm::Type *Tys[2] = { Ty, VTy };
8456     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8457     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8458     return Builder.CreateTrunc(Ops[0], Int16Ty);
8459   }
8460   case NEON::BI__builtin_neon_vminvq_u8: {
8461     Int = Intrinsic::aarch64_neon_uminv;
8462     Ty = Int32Ty;
8463     VTy = llvm::VectorType::get(Int8Ty, 16);
8464     llvm::Type *Tys[2] = { Ty, VTy };
8465     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8466     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8467     return Builder.CreateTrunc(Ops[0], Int8Ty);
8468   }
8469   case NEON::BI__builtin_neon_vminvq_u16: {
8470     Int = Intrinsic::aarch64_neon_uminv;
8471     Ty = Int32Ty;
8472     VTy = llvm::VectorType::get(Int16Ty, 8);
8473     llvm::Type *Tys[2] = { Ty, VTy };
8474     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8475     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8476     return Builder.CreateTrunc(Ops[0], Int16Ty);
8477   }
8478   case NEON::BI__builtin_neon_vminv_s8: {
8479     Int = Intrinsic::aarch64_neon_sminv;
8480     Ty = Int32Ty;
8481     VTy = llvm::VectorType::get(Int8Ty, 8);
8482     llvm::Type *Tys[2] = { Ty, VTy };
8483     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8484     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8485     return Builder.CreateTrunc(Ops[0], Int8Ty);
8486   }
8487   case NEON::BI__builtin_neon_vminv_s16: {
8488     Int = Intrinsic::aarch64_neon_sminv;
8489     Ty = Int32Ty;
8490     VTy = llvm::VectorType::get(Int16Ty, 4);
8491     llvm::Type *Tys[2] = { Ty, VTy };
8492     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8493     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8494     return Builder.CreateTrunc(Ops[0], Int16Ty);
8495   }
8496   case NEON::BI__builtin_neon_vminvq_s8: {
8497     Int = Intrinsic::aarch64_neon_sminv;
8498     Ty = Int32Ty;
8499     VTy = llvm::VectorType::get(Int8Ty, 16);
8500     llvm::Type *Tys[2] = { Ty, VTy };
8501     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8502     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8503     return Builder.CreateTrunc(Ops[0], Int8Ty);
8504   }
8505   case NEON::BI__builtin_neon_vminvq_s16: {
8506     Int = Intrinsic::aarch64_neon_sminv;
8507     Ty = Int32Ty;
8508     VTy = llvm::VectorType::get(Int16Ty, 8);
8509     llvm::Type *Tys[2] = { Ty, VTy };
8510     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8511     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8512     return Builder.CreateTrunc(Ops[0], Int16Ty);
8513   }
8514   case NEON::BI__builtin_neon_vminv_f16: {
8515     Int = Intrinsic::aarch64_neon_fminv;
8516     Ty = HalfTy;
8517     VTy = llvm::VectorType::get(HalfTy, 4);
8518     llvm::Type *Tys[2] = { Ty, VTy };
8519     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8520     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8521     return Builder.CreateTrunc(Ops[0], HalfTy);
8522   }
8523   case NEON::BI__builtin_neon_vminvq_f16: {
8524     Int = Intrinsic::aarch64_neon_fminv;
8525     Ty = HalfTy;
8526     VTy = llvm::VectorType::get(HalfTy, 8);
8527     llvm::Type *Tys[2] = { Ty, VTy };
8528     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8529     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8530     return Builder.CreateTrunc(Ops[0], HalfTy);
8531   }
8532   case NEON::BI__builtin_neon_vmaxnmv_f16: {
8533     Int = Intrinsic::aarch64_neon_fmaxnmv;
8534     Ty = HalfTy;
8535     VTy = llvm::VectorType::get(HalfTy, 4);
8536     llvm::Type *Tys[2] = { Ty, VTy };
8537     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8538     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
8539     return Builder.CreateTrunc(Ops[0], HalfTy);
8540   }
8541   case NEON::BI__builtin_neon_vmaxnmvq_f16: {
8542     Int = Intrinsic::aarch64_neon_fmaxnmv;
8543     Ty = HalfTy;
8544     VTy = llvm::VectorType::get(HalfTy, 8);
8545     llvm::Type *Tys[2] = { Ty, VTy };
8546     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8547     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
8548     return Builder.CreateTrunc(Ops[0], HalfTy);
8549   }
8550   case NEON::BI__builtin_neon_vminnmv_f16: {
8551     Int = Intrinsic::aarch64_neon_fminnmv;
8552     Ty = HalfTy;
8553     VTy = llvm::VectorType::get(HalfTy, 4);
8554     llvm::Type *Tys[2] = { Ty, VTy };
8555     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8556     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
8557     return Builder.CreateTrunc(Ops[0], HalfTy);
8558   }
8559   case NEON::BI__builtin_neon_vminnmvq_f16: {
8560     Int = Intrinsic::aarch64_neon_fminnmv;
8561     Ty = HalfTy;
8562     VTy = llvm::VectorType::get(HalfTy, 8);
8563     llvm::Type *Tys[2] = { Ty, VTy };
8564     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8565     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
8566     return Builder.CreateTrunc(Ops[0], HalfTy);
8567   }
8568   case NEON::BI__builtin_neon_vmul_n_f64: {
8569     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
8570     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
8571     return Builder.CreateFMul(Ops[0], RHS);
8572   }
8573   case NEON::BI__builtin_neon_vaddlv_u8: {
8574     Int = Intrinsic::aarch64_neon_uaddlv;
8575     Ty = Int32Ty;
8576     VTy = llvm::VectorType::get(Int8Ty, 8);
8577     llvm::Type *Tys[2] = { Ty, VTy };
8578     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8579     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8580     return Builder.CreateTrunc(Ops[0], Int16Ty);
8581   }
8582   case NEON::BI__builtin_neon_vaddlv_u16: {
8583     Int = Intrinsic::aarch64_neon_uaddlv;
8584     Ty = Int32Ty;
8585     VTy = llvm::VectorType::get(Int16Ty, 4);
8586     llvm::Type *Tys[2] = { Ty, VTy };
8587     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8588     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8589   }
8590   case NEON::BI__builtin_neon_vaddlvq_u8: {
8591     Int = Intrinsic::aarch64_neon_uaddlv;
8592     Ty = Int32Ty;
8593     VTy = llvm::VectorType::get(Int8Ty, 16);
8594     llvm::Type *Tys[2] = { Ty, VTy };
8595     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8596     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8597     return Builder.CreateTrunc(Ops[0], Int16Ty);
8598   }
8599   case NEON::BI__builtin_neon_vaddlvq_u16: {
8600     Int = Intrinsic::aarch64_neon_uaddlv;
8601     Ty = Int32Ty;
8602     VTy = llvm::VectorType::get(Int16Ty, 8);
8603     llvm::Type *Tys[2] = { Ty, VTy };
8604     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8605     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8606   }
8607   case NEON::BI__builtin_neon_vaddlv_s8: {
8608     Int = Intrinsic::aarch64_neon_saddlv;
8609     Ty = Int32Ty;
8610     VTy = llvm::VectorType::get(Int8Ty, 8);
8611     llvm::Type *Tys[2] = { Ty, VTy };
8612     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8613     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8614     return Builder.CreateTrunc(Ops[0], Int16Ty);
8615   }
8616   case NEON::BI__builtin_neon_vaddlv_s16: {
8617     Int = Intrinsic::aarch64_neon_saddlv;
8618     Ty = Int32Ty;
8619     VTy = llvm::VectorType::get(Int16Ty, 4);
8620     llvm::Type *Tys[2] = { Ty, VTy };
8621     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8622     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8623   }
8624   case NEON::BI__builtin_neon_vaddlvq_s8: {
8625     Int = Intrinsic::aarch64_neon_saddlv;
8626     Ty = Int32Ty;
8627     VTy = llvm::VectorType::get(Int8Ty, 16);
8628     llvm::Type *Tys[2] = { Ty, VTy };
8629     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8630     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8631     return Builder.CreateTrunc(Ops[0], Int16Ty);
8632   }
8633   case NEON::BI__builtin_neon_vaddlvq_s16: {
8634     Int = Intrinsic::aarch64_neon_saddlv;
8635     Ty = Int32Ty;
8636     VTy = llvm::VectorType::get(Int16Ty, 8);
8637     llvm::Type *Tys[2] = { Ty, VTy };
8638     Ops.push_back(EmitScalarExpr(E->getArg(0)));
8639     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8640   }
8641   case NEON::BI__builtin_neon_vsri_n_v:
8642   case NEON::BI__builtin_neon_vsriq_n_v: {
8643     Int = Intrinsic::aarch64_neon_vsri;
8644     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
8645     return EmitNeonCall(Intrin, Ops, "vsri_n");
8646   }
8647   case NEON::BI__builtin_neon_vsli_n_v:
8648   case NEON::BI__builtin_neon_vsliq_n_v: {
8649     Int = Intrinsic::aarch64_neon_vsli;
8650     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
8651     return EmitNeonCall(Intrin, Ops, "vsli_n");
8652   }
8653   case NEON::BI__builtin_neon_vsra_n_v:
8654   case NEON::BI__builtin_neon_vsraq_n_v:
8655     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8656     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
8657     return Builder.CreateAdd(Ops[0], Ops[1]);
8658   case NEON::BI__builtin_neon_vrsra_n_v:
8659   case NEON::BI__builtin_neon_vrsraq_n_v: {
8660     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
8661     SmallVector<llvm::Value*,2> TmpOps;
8662     TmpOps.push_back(Ops[1]);
8663     TmpOps.push_back(Ops[2]);
8664     Function* F = CGM.getIntrinsic(Int, Ty);
8665     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
8666     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
8667     return Builder.CreateAdd(Ops[0], tmp);
8668   }
8669   case NEON::BI__builtin_neon_vld1_v:
8670   case NEON::BI__builtin_neon_vld1q_v: {
8671     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
8672     auto Alignment = CharUnits::fromQuantity(
8673         BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16);
8674     return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
8675   }
8676   case NEON::BI__builtin_neon_vst1_v:
8677   case NEON::BI__builtin_neon_vst1q_v:
8678     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
8679     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
8680     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8681   case NEON::BI__builtin_neon_vld1_lane_v:
8682   case NEON::BI__builtin_neon_vld1q_lane_v: {
8683     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8684     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
8685     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8686     auto Alignment = CharUnits::fromQuantity(
8687         BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16);
8688     Ops[0] =
8689         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
8690     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
8691   }
8692   case NEON::BI__builtin_neon_vld1_dup_v:
8693   case NEON::BI__builtin_neon_vld1q_dup_v: {
8694     Value *V = UndefValue::get(Ty);
8695     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
8696     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8697     auto Alignment = CharUnits::fromQuantity(
8698         BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16);
8699     Ops[0] =
8700         Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
8701     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
8702     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
8703     return EmitNeonSplat(Ops[0], CI);
8704   }
8705   case NEON::BI__builtin_neon_vst1_lane_v:
8706   case NEON::BI__builtin_neon_vst1q_lane_v:
8707     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8708     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
8709     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
8710     return Builder.CreateDefaultAlignedStore(Ops[1],
8711                                              Builder.CreateBitCast(Ops[0], Ty));
8712   case NEON::BI__builtin_neon_vld2_v:
8713   case NEON::BI__builtin_neon_vld2q_v: {
8714     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
8715     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
8716     llvm::Type *Tys[2] = { VTy, PTy };
8717     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
8718     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
8719     Ops[0] = Builder.CreateBitCast(Ops[0],
8720                 llvm::PointerType::getUnqual(Ops[1]->getType()));
8721     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8722   }
8723   case NEON::BI__builtin_neon_vld3_v:
8724   case NEON::BI__builtin_neon_vld3q_v: {
8725     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
8726     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
8727     llvm::Type *Tys[2] = { VTy, PTy };
8728     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
8729     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
8730     Ops[0] = Builder.CreateBitCast(Ops[0],
8731                 llvm::PointerType::getUnqual(Ops[1]->getType()));
8732     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8733   }
8734   case NEON::BI__builtin_neon_vld4_v:
8735   case NEON::BI__builtin_neon_vld4q_v: {
8736     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
8737     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
8738     llvm::Type *Tys[2] = { VTy, PTy };
8739     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
8740     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
8741     Ops[0] = Builder.CreateBitCast(Ops[0],
8742                 llvm::PointerType::getUnqual(Ops[1]->getType()));
8743     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8744   }
8745   case NEON::BI__builtin_neon_vld2_dup_v:
8746   case NEON::BI__builtin_neon_vld2q_dup_v: {
8747     llvm::Type *PTy =
8748       llvm::PointerType::getUnqual(VTy->getElementType());
8749     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
8750     llvm::Type *Tys[2] = { VTy, PTy };
8751     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
8752     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
8753     Ops[0] = Builder.CreateBitCast(Ops[0],
8754                 llvm::PointerType::getUnqual(Ops[1]->getType()));
8755     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8756   }
8757   case NEON::BI__builtin_neon_vld3_dup_v:
8758   case NEON::BI__builtin_neon_vld3q_dup_v: {
8759     llvm::Type *PTy =
8760       llvm::PointerType::getUnqual(VTy->getElementType());
8761     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
8762     llvm::Type *Tys[2] = { VTy, PTy };
8763     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
8764     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
8765     Ops[0] = Builder.CreateBitCast(Ops[0],
8766                 llvm::PointerType::getUnqual(Ops[1]->getType()));
8767     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8768   }
8769   case NEON::BI__builtin_neon_vld4_dup_v:
8770   case NEON::BI__builtin_neon_vld4q_dup_v: {
8771     llvm::Type *PTy =
8772       llvm::PointerType::getUnqual(VTy->getElementType());
8773     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
8774     llvm::Type *Tys[2] = { VTy, PTy };
8775     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
8776     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
8777     Ops[0] = Builder.CreateBitCast(Ops[0],
8778                 llvm::PointerType::getUnqual(Ops[1]->getType()));
8779     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8780   }
8781   case NEON::BI__builtin_neon_vld2_lane_v:
8782   case NEON::BI__builtin_neon_vld2q_lane_v: {
8783     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
8784     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
8785     Ops.push_back(Ops[1]);
8786     Ops.erase(Ops.begin()+1);
8787     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8788     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8789     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8790     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
8791     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
8792     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8793     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8794   }
8795   case NEON::BI__builtin_neon_vld3_lane_v:
8796   case NEON::BI__builtin_neon_vld3q_lane_v: {
8797     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
8798     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
8799     Ops.push_back(Ops[1]);
8800     Ops.erase(Ops.begin()+1);
8801     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8802     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8803     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
8804     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
8805     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
8806     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
8807     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8808     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8809   }
8810   case NEON::BI__builtin_neon_vld4_lane_v:
8811   case NEON::BI__builtin_neon_vld4q_lane_v: {
8812     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
8813     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
8814     Ops.push_back(Ops[1]);
8815     Ops.erase(Ops.begin()+1);
8816     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8817     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8818     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
8819     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
8820     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
8821     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
8822     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
8823     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8824     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8825   }
8826   case NEON::BI__builtin_neon_vst2_v:
8827   case NEON::BI__builtin_neon_vst2q_v: {
8828     Ops.push_back(Ops[0]);
8829     Ops.erase(Ops.begin());
8830     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
8831     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
8832                         Ops, "");
8833   }
8834   case NEON::BI__builtin_neon_vst2_lane_v:
8835   case NEON::BI__builtin_neon_vst2q_lane_v: {
8836     Ops.push_back(Ops[0]);
8837     Ops.erase(Ops.begin());
8838     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
8839     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
8840     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
8841                         Ops, "");
8842   }
8843   case NEON::BI__builtin_neon_vst3_v:
8844   case NEON::BI__builtin_neon_vst3q_v: {
8845     Ops.push_back(Ops[0]);
8846     Ops.erase(Ops.begin());
8847     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
8848     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
8849                         Ops, "");
8850   }
8851   case NEON::BI__builtin_neon_vst3_lane_v:
8852   case NEON::BI__builtin_neon_vst3q_lane_v: {
8853     Ops.push_back(Ops[0]);
8854     Ops.erase(Ops.begin());
8855     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8856     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
8857     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
8858                         Ops, "");
8859   }
8860   case NEON::BI__builtin_neon_vst4_v:
8861   case NEON::BI__builtin_neon_vst4q_v: {
8862     Ops.push_back(Ops[0]);
8863     Ops.erase(Ops.begin());
8864     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
8865     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
8866                         Ops, "");
8867   }
8868   case NEON::BI__builtin_neon_vst4_lane_v:
8869   case NEON::BI__builtin_neon_vst4q_lane_v: {
8870     Ops.push_back(Ops[0]);
8871     Ops.erase(Ops.begin());
8872     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
8873     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
8874     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
8875                         Ops, "");
8876   }
8877   case NEON::BI__builtin_neon_vtrn_v:
8878   case NEON::BI__builtin_neon_vtrnq_v: {
8879     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
8880     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8881     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8882     Value *SV = nullptr;
8883 
8884     for (unsigned vi = 0; vi != 2; ++vi) {
8885       SmallVector<uint32_t, 16> Indices;
8886       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8887         Indices.push_back(i+vi);
8888         Indices.push_back(i+e+vi);
8889       }
8890       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8891       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
8892       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8893     }
8894     return SV;
8895   }
8896   case NEON::BI__builtin_neon_vuzp_v:
8897   case NEON::BI__builtin_neon_vuzpq_v: {
8898     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
8899     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8900     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8901     Value *SV = nullptr;
8902 
8903     for (unsigned vi = 0; vi != 2; ++vi) {
8904       SmallVector<uint32_t, 16> Indices;
8905       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8906         Indices.push_back(2*i+vi);
8907 
8908       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8909       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
8910       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8911     }
8912     return SV;
8913   }
8914   case NEON::BI__builtin_neon_vzip_v:
8915   case NEON::BI__builtin_neon_vzipq_v: {
8916     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
8917     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8918     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8919     Value *SV = nullptr;
8920 
8921     for (unsigned vi = 0; vi != 2; ++vi) {
8922       SmallVector<uint32_t, 16> Indices;
8923       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8924         Indices.push_back((i + vi*e) >> 1);
8925         Indices.push_back(((i + vi*e) >> 1)+e);
8926       }
8927       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8928       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8929       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8930     }
8931     return SV;
8932   }
8933   case NEON::BI__builtin_neon_vqtbl1q_v: {
8934     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
8935                         Ops, "vtbl1");
8936   }
8937   case NEON::BI__builtin_neon_vqtbl2q_v: {
8938     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
8939                         Ops, "vtbl2");
8940   }
8941   case NEON::BI__builtin_neon_vqtbl3q_v: {
8942     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
8943                         Ops, "vtbl3");
8944   }
8945   case NEON::BI__builtin_neon_vqtbl4q_v: {
8946     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
8947                         Ops, "vtbl4");
8948   }
8949   case NEON::BI__builtin_neon_vqtbx1q_v: {
8950     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
8951                         Ops, "vtbx1");
8952   }
8953   case NEON::BI__builtin_neon_vqtbx2q_v: {
8954     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
8955                         Ops, "vtbx2");
8956   }
8957   case NEON::BI__builtin_neon_vqtbx3q_v: {
8958     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
8959                         Ops, "vtbx3");
8960   }
8961   case NEON::BI__builtin_neon_vqtbx4q_v: {
8962     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
8963                         Ops, "vtbx4");
8964   }
8965   case NEON::BI__builtin_neon_vsqadd_v:
8966   case NEON::BI__builtin_neon_vsqaddq_v: {
8967     Int = Intrinsic::aarch64_neon_usqadd;
8968     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
8969   }
8970   case NEON::BI__builtin_neon_vuqadd_v:
8971   case NEON::BI__builtin_neon_vuqaddq_v: {
8972     Int = Intrinsic::aarch64_neon_suqadd;
8973     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
8974   }
8975   case AArch64::BI_BitScanForward:
8976   case AArch64::BI_BitScanForward64:
8977     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
8978   case AArch64::BI_BitScanReverse:
8979   case AArch64::BI_BitScanReverse64:
8980     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
8981   case AArch64::BI_InterlockedAnd64:
8982     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
8983   case AArch64::BI_InterlockedExchange64:
8984     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
8985   case AArch64::BI_InterlockedExchangeAdd64:
8986     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
8987   case AArch64::BI_InterlockedExchangeSub64:
8988     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
8989   case AArch64::BI_InterlockedOr64:
8990     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
8991   case AArch64::BI_InterlockedXor64:
8992     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
8993   case AArch64::BI_InterlockedDecrement64:
8994     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
8995   case AArch64::BI_InterlockedIncrement64:
8996     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
8997   case AArch64::BI_InterlockedExchangeAdd8_acq:
8998   case AArch64::BI_InterlockedExchangeAdd16_acq:
8999   case AArch64::BI_InterlockedExchangeAdd_acq:
9000   case AArch64::BI_InterlockedExchangeAdd64_acq:
9001     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E);
9002   case AArch64::BI_InterlockedExchangeAdd8_rel:
9003   case AArch64::BI_InterlockedExchangeAdd16_rel:
9004   case AArch64::BI_InterlockedExchangeAdd_rel:
9005   case AArch64::BI_InterlockedExchangeAdd64_rel:
9006     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E);
9007   case AArch64::BI_InterlockedExchangeAdd8_nf:
9008   case AArch64::BI_InterlockedExchangeAdd16_nf:
9009   case AArch64::BI_InterlockedExchangeAdd_nf:
9010   case AArch64::BI_InterlockedExchangeAdd64_nf:
9011     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E);
9012   case AArch64::BI_InterlockedExchange8_acq:
9013   case AArch64::BI_InterlockedExchange16_acq:
9014   case AArch64::BI_InterlockedExchange_acq:
9015   case AArch64::BI_InterlockedExchange64_acq:
9016     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E);
9017   case AArch64::BI_InterlockedExchange8_rel:
9018   case AArch64::BI_InterlockedExchange16_rel:
9019   case AArch64::BI_InterlockedExchange_rel:
9020   case AArch64::BI_InterlockedExchange64_rel:
9021     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E);
9022   case AArch64::BI_InterlockedExchange8_nf:
9023   case AArch64::BI_InterlockedExchange16_nf:
9024   case AArch64::BI_InterlockedExchange_nf:
9025   case AArch64::BI_InterlockedExchange64_nf:
9026     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E);
9027   case AArch64::BI_InterlockedCompareExchange8_acq:
9028   case AArch64::BI_InterlockedCompareExchange16_acq:
9029   case AArch64::BI_InterlockedCompareExchange_acq:
9030   case AArch64::BI_InterlockedCompareExchange64_acq:
9031     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E);
9032   case AArch64::BI_InterlockedCompareExchange8_rel:
9033   case AArch64::BI_InterlockedCompareExchange16_rel:
9034   case AArch64::BI_InterlockedCompareExchange_rel:
9035   case AArch64::BI_InterlockedCompareExchange64_rel:
9036     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E);
9037   case AArch64::BI_InterlockedCompareExchange8_nf:
9038   case AArch64::BI_InterlockedCompareExchange16_nf:
9039   case AArch64::BI_InterlockedCompareExchange_nf:
9040   case AArch64::BI_InterlockedCompareExchange64_nf:
9041     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E);
9042   case AArch64::BI_InterlockedOr8_acq:
9043   case AArch64::BI_InterlockedOr16_acq:
9044   case AArch64::BI_InterlockedOr_acq:
9045   case AArch64::BI_InterlockedOr64_acq:
9046     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E);
9047   case AArch64::BI_InterlockedOr8_rel:
9048   case AArch64::BI_InterlockedOr16_rel:
9049   case AArch64::BI_InterlockedOr_rel:
9050   case AArch64::BI_InterlockedOr64_rel:
9051     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E);
9052   case AArch64::BI_InterlockedOr8_nf:
9053   case AArch64::BI_InterlockedOr16_nf:
9054   case AArch64::BI_InterlockedOr_nf:
9055   case AArch64::BI_InterlockedOr64_nf:
9056     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E);
9057   case AArch64::BI_InterlockedXor8_acq:
9058   case AArch64::BI_InterlockedXor16_acq:
9059   case AArch64::BI_InterlockedXor_acq:
9060   case AArch64::BI_InterlockedXor64_acq:
9061     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E);
9062   case AArch64::BI_InterlockedXor8_rel:
9063   case AArch64::BI_InterlockedXor16_rel:
9064   case AArch64::BI_InterlockedXor_rel:
9065   case AArch64::BI_InterlockedXor64_rel:
9066     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E);
9067   case AArch64::BI_InterlockedXor8_nf:
9068   case AArch64::BI_InterlockedXor16_nf:
9069   case AArch64::BI_InterlockedXor_nf:
9070   case AArch64::BI_InterlockedXor64_nf:
9071     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E);
9072   case AArch64::BI_InterlockedAnd8_acq:
9073   case AArch64::BI_InterlockedAnd16_acq:
9074   case AArch64::BI_InterlockedAnd_acq:
9075   case AArch64::BI_InterlockedAnd64_acq:
9076     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E);
9077   case AArch64::BI_InterlockedAnd8_rel:
9078   case AArch64::BI_InterlockedAnd16_rel:
9079   case AArch64::BI_InterlockedAnd_rel:
9080   case AArch64::BI_InterlockedAnd64_rel:
9081     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E);
9082   case AArch64::BI_InterlockedAnd8_nf:
9083   case AArch64::BI_InterlockedAnd16_nf:
9084   case AArch64::BI_InterlockedAnd_nf:
9085   case AArch64::BI_InterlockedAnd64_nf:
9086     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E);
9087   case AArch64::BI_InterlockedIncrement16_acq:
9088   case AArch64::BI_InterlockedIncrement_acq:
9089   case AArch64::BI_InterlockedIncrement64_acq:
9090     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E);
9091   case AArch64::BI_InterlockedIncrement16_rel:
9092   case AArch64::BI_InterlockedIncrement_rel:
9093   case AArch64::BI_InterlockedIncrement64_rel:
9094     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E);
9095   case AArch64::BI_InterlockedIncrement16_nf:
9096   case AArch64::BI_InterlockedIncrement_nf:
9097   case AArch64::BI_InterlockedIncrement64_nf:
9098     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E);
9099   case AArch64::BI_InterlockedDecrement16_acq:
9100   case AArch64::BI_InterlockedDecrement_acq:
9101   case AArch64::BI_InterlockedDecrement64_acq:
9102     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E);
9103   case AArch64::BI_InterlockedDecrement16_rel:
9104   case AArch64::BI_InterlockedDecrement_rel:
9105   case AArch64::BI_InterlockedDecrement64_rel:
9106     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E);
9107   case AArch64::BI_InterlockedDecrement16_nf:
9108   case AArch64::BI_InterlockedDecrement_nf:
9109   case AArch64::BI_InterlockedDecrement64_nf:
9110     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E);
9111 
9112   case AArch64::BI_InterlockedAdd: {
9113     Value *Arg0 = EmitScalarExpr(E->getArg(0));
9114     Value *Arg1 = EmitScalarExpr(E->getArg(1));
9115     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
9116       AtomicRMWInst::Add, Arg0, Arg1,
9117       llvm::AtomicOrdering::SequentiallyConsistent);
9118     return Builder.CreateAdd(RMWI, Arg1);
9119   }
9120   }
9121 }
9122 
9123 llvm::Value *CodeGenFunction::
9124 BuildVector(ArrayRef<llvm::Value*> Ops) {
9125   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
9126          "Not a power-of-two sized vector!");
9127   bool AllConstants = true;
9128   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
9129     AllConstants &= isa<Constant>(Ops[i]);
9130 
9131   // If this is a constant vector, create a ConstantVector.
9132   if (AllConstants) {
9133     SmallVector<llvm::Constant*, 16> CstOps;
9134     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
9135       CstOps.push_back(cast<Constant>(Ops[i]));
9136     return llvm::ConstantVector::get(CstOps);
9137   }
9138 
9139   // Otherwise, insertelement the values to build the vector.
9140   Value *Result =
9141     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
9142 
9143   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
9144     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
9145 
9146   return Result;
9147 }
9148 
9149 // Convert the mask from an integer type to a vector of i1.
9150 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
9151                               unsigned NumElts) {
9152 
9153   llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
9154                          cast<IntegerType>(Mask->getType())->getBitWidth());
9155   Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
9156 
9157   // If we have less than 8 elements, then the starting mask was an i8 and
9158   // we need to extract down to the right number of elements.
9159   if (NumElts < 8) {
9160     uint32_t Indices[4];
9161     for (unsigned i = 0; i != NumElts; ++i)
9162       Indices[i] = i;
9163     MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
9164                                              makeArrayRef(Indices, NumElts),
9165                                              "extract");
9166   }
9167   return MaskVec;
9168 }
9169 
9170 static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
9171                                  ArrayRef<Value *> Ops,
9172                                  unsigned Align) {
9173   // Cast the pointer to right type.
9174   Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
9175                                llvm::PointerType::getUnqual(Ops[1]->getType()));
9176 
9177   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
9178                                    Ops[1]->getType()->getVectorNumElements());
9179 
9180   return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Align, MaskVec);
9181 }
9182 
9183 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
9184                                 ArrayRef<Value *> Ops, unsigned Align) {
9185   // Cast the pointer to right type.
9186   Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
9187                                llvm::PointerType::getUnqual(Ops[1]->getType()));
9188 
9189   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
9190                                    Ops[1]->getType()->getVectorNumElements());
9191 
9192   return CGF.Builder.CreateMaskedLoad(Ptr, Align, MaskVec, Ops[1]);
9193 }
9194 
9195 static Value *EmitX86ExpandLoad(CodeGenFunction &CGF,
9196                                 ArrayRef<Value *> Ops) {
9197   llvm::Type *ResultTy = Ops[1]->getType();
9198   llvm::Type *PtrTy = ResultTy->getVectorElementType();
9199 
9200   // Cast the pointer to element type.
9201   Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
9202                                          llvm::PointerType::getUnqual(PtrTy));
9203 
9204   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
9205                                    ResultTy->getVectorNumElements());
9206 
9207   llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
9208                                            ResultTy);
9209   return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
9210 }
9211 
9212 static Value *EmitX86CompressExpand(CodeGenFunction &CGF,
9213                                     ArrayRef<Value *> Ops,
9214                                     bool IsCompress) {
9215   llvm::Type *ResultTy = Ops[1]->getType();
9216 
9217   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
9218                                    ResultTy->getVectorNumElements());
9219 
9220   Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
9221                                  : Intrinsic::x86_avx512_mask_expand;
9222   llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
9223   return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
9224 }
9225 
9226 static Value *EmitX86CompressStore(CodeGenFunction &CGF,
9227                                    ArrayRef<Value *> Ops) {
9228   llvm::Type *ResultTy = Ops[1]->getType();
9229   llvm::Type *PtrTy = ResultTy->getVectorElementType();
9230 
9231   // Cast the pointer to element type.
9232   Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
9233                                          llvm::PointerType::getUnqual(PtrTy));
9234 
9235   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
9236                                    ResultTy->getVectorNumElements());
9237 
9238   llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
9239                                            ResultTy);
9240   return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
9241 }
9242 
9243 static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
9244                               ArrayRef<Value *> Ops,
9245                               bool InvertLHS = false) {
9246   unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
9247   Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
9248   Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
9249 
9250   if (InvertLHS)
9251     LHS = CGF.Builder.CreateNot(LHS);
9252 
9253   return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
9254                                    Ops[0]->getType());
9255 }
9256 
9257 static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1,
9258                                  Value *Amt, bool IsRight) {
9259   llvm::Type *Ty = Op0->getType();
9260 
9261   // Amount may be scalar immediate, in which case create a splat vector.
9262   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
9263   // we only care about the lowest log2 bits anyway.
9264   if (Amt->getType() != Ty) {
9265     unsigned NumElts = Ty->getVectorNumElements();
9266     Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
9267     Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
9268   }
9269 
9270   unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
9271   Function *F = CGF.CGM.getIntrinsic(IID, Ty);
9272   return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
9273 }
9274 
9275 static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
9276                            bool IsSigned) {
9277   Value *Op0 = Ops[0];
9278   Value *Op1 = Ops[1];
9279   llvm::Type *Ty = Op0->getType();
9280   uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
9281 
9282   CmpInst::Predicate Pred;
9283   switch (Imm) {
9284   case 0x0:
9285     Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
9286     break;
9287   case 0x1:
9288     Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
9289     break;
9290   case 0x2:
9291     Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
9292     break;
9293   case 0x3:
9294     Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
9295     break;
9296   case 0x4:
9297     Pred = ICmpInst::ICMP_EQ;
9298     break;
9299   case 0x5:
9300     Pred = ICmpInst::ICMP_NE;
9301     break;
9302   case 0x6:
9303     return llvm::Constant::getNullValue(Ty); // FALSE
9304   case 0x7:
9305     return llvm::Constant::getAllOnesValue(Ty); // TRUE
9306   default:
9307     llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
9308   }
9309 
9310   Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
9311   Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
9312   return Res;
9313 }
9314 
9315 static Value *EmitX86Select(CodeGenFunction &CGF,
9316                             Value *Mask, Value *Op0, Value *Op1) {
9317 
9318   // If the mask is all ones just return first argument.
9319   if (const auto *C = dyn_cast<Constant>(Mask))
9320     if (C->isAllOnesValue())
9321       return Op0;
9322 
9323   Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
9324 
9325   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
9326 }
9327 
9328 static Value *EmitX86ScalarSelect(CodeGenFunction &CGF,
9329                                   Value *Mask, Value *Op0, Value *Op1) {
9330   // If the mask is all ones just return first argument.
9331   if (const auto *C = dyn_cast<Constant>(Mask))
9332     if (C->isAllOnesValue())
9333       return Op0;
9334 
9335   llvm::VectorType *MaskTy =
9336     llvm::VectorType::get(CGF.Builder.getInt1Ty(),
9337                           Mask->getType()->getIntegerBitWidth());
9338   Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
9339   Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
9340   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
9341 }
9342 
9343 static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,
9344                                          unsigned NumElts, Value *MaskIn) {
9345   if (MaskIn) {
9346     const auto *C = dyn_cast<Constant>(MaskIn);
9347     if (!C || !C->isAllOnesValue())
9348       Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
9349   }
9350 
9351   if (NumElts < 8) {
9352     uint32_t Indices[8];
9353     for (unsigned i = 0; i != NumElts; ++i)
9354       Indices[i] = i;
9355     for (unsigned i = NumElts; i != 8; ++i)
9356       Indices[i] = i % NumElts + NumElts;
9357     Cmp = CGF.Builder.CreateShuffleVector(
9358         Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
9359   }
9360 
9361   return CGF.Builder.CreateBitCast(Cmp,
9362                                    IntegerType::get(CGF.getLLVMContext(),
9363                                                     std::max(NumElts, 8U)));
9364 }
9365 
9366 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
9367                                    bool Signed, ArrayRef<Value *> Ops) {
9368   assert((Ops.size() == 2 || Ops.size() == 4) &&
9369          "Unexpected number of arguments");
9370   unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
9371   Value *Cmp;
9372 
9373   if (CC == 3) {
9374     Cmp = Constant::getNullValue(
9375                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
9376   } else if (CC == 7) {
9377     Cmp = Constant::getAllOnesValue(
9378                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
9379   } else {
9380     ICmpInst::Predicate Pred;
9381     switch (CC) {
9382     default: llvm_unreachable("Unknown condition code");
9383     case 0: Pred = ICmpInst::ICMP_EQ;  break;
9384     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
9385     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
9386     case 4: Pred = ICmpInst::ICMP_NE;  break;
9387     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
9388     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
9389     }
9390     Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
9391   }
9392 
9393   Value *MaskIn = nullptr;
9394   if (Ops.size() == 4)
9395     MaskIn = Ops[3];
9396 
9397   return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
9398 }
9399 
9400 static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
9401   Value *Zero = Constant::getNullValue(In->getType());
9402   return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
9403 }
9404 
9405 static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF,
9406                                     ArrayRef<Value *> Ops, bool IsSigned) {
9407   unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
9408   llvm::Type *Ty = Ops[1]->getType();
9409 
9410   Value *Res;
9411   if (Rnd != 4) {
9412     Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
9413                                  : Intrinsic::x86_avx512_uitofp_round;
9414     Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
9415     Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
9416   } else {
9417     Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
9418                    : CGF.Builder.CreateUIToFP(Ops[0], Ty);
9419   }
9420 
9421   return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
9422 }
9423 
9424 static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) {
9425 
9426   llvm::Type *Ty = Ops[0]->getType();
9427   Value *Zero = llvm::Constant::getNullValue(Ty);
9428   Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]);
9429   Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero);
9430   Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub);
9431   return Res;
9432 }
9433 
9434 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
9435                             ArrayRef<Value *> Ops) {
9436   Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
9437   Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
9438 
9439   assert(Ops.size() == 2);
9440   return Res;
9441 }
9442 
9443 // Lowers X86 FMA intrinsics to IR.
9444 static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
9445                              unsigned BuiltinID, bool IsAddSub) {
9446 
9447   bool Subtract = false;
9448   Intrinsic::ID IID = Intrinsic::not_intrinsic;
9449   switch (BuiltinID) {
9450   default: break;
9451   case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
9452     Subtract = true;
9453     LLVM_FALLTHROUGH;
9454   case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
9455   case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
9456   case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
9457     IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
9458   case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
9459     Subtract = true;
9460     LLVM_FALLTHROUGH;
9461   case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
9462   case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
9463   case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
9464     IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
9465   case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
9466     Subtract = true;
9467     LLVM_FALLTHROUGH;
9468   case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
9469   case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
9470   case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
9471     IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
9472     break;
9473   case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
9474     Subtract = true;
9475     LLVM_FALLTHROUGH;
9476   case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
9477   case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
9478   case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
9479     IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
9480     break;
9481   }
9482 
9483   Value *A = Ops[0];
9484   Value *B = Ops[1];
9485   Value *C = Ops[2];
9486 
9487   if (Subtract)
9488     C = CGF.Builder.CreateFNeg(C);
9489 
9490   Value *Res;
9491 
9492   // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
9493   if (IID != Intrinsic::not_intrinsic &&
9494       cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4) {
9495     Function *Intr = CGF.CGM.getIntrinsic(IID);
9496     Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
9497   } else {
9498     llvm::Type *Ty = A->getType();
9499     Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
9500     Res = CGF.Builder.CreateCall(FMA, {A, B, C} );
9501 
9502     if (IsAddSub) {
9503       // Negate even elts in C using a mask.
9504       unsigned NumElts = Ty->getVectorNumElements();
9505       SmallVector<uint32_t, 16> Indices(NumElts);
9506       for (unsigned i = 0; i != NumElts; ++i)
9507         Indices[i] = i + (i % 2) * NumElts;
9508 
9509       Value *NegC = CGF.Builder.CreateFNeg(C);
9510       Value *FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} );
9511       Res = CGF.Builder.CreateShuffleVector(FMSub, Res, Indices);
9512     }
9513   }
9514 
9515   // Handle any required masking.
9516   Value *MaskFalseVal = nullptr;
9517   switch (BuiltinID) {
9518   case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
9519   case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
9520   case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
9521   case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
9522     MaskFalseVal = Ops[0];
9523     break;
9524   case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
9525   case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
9526   case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
9527   case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
9528     MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
9529     break;
9530   case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
9531   case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
9532   case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
9533   case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
9534   case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
9535   case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
9536   case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
9537   case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
9538     MaskFalseVal = Ops[2];
9539     break;
9540   }
9541 
9542   if (MaskFalseVal)
9543     return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
9544 
9545   return Res;
9546 }
9547 
9548 static Value *
9549 EmitScalarFMAExpr(CodeGenFunction &CGF, MutableArrayRef<Value *> Ops,
9550                   Value *Upper, bool ZeroMask = false, unsigned PTIdx = 0,
9551                   bool NegAcc = false) {
9552   unsigned Rnd = 4;
9553   if (Ops.size() > 4)
9554     Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
9555 
9556   if (NegAcc)
9557     Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
9558 
9559   Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
9560   Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
9561   Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
9562   Value *Res;
9563   if (Rnd != 4) {
9564     Intrinsic::ID IID = Ops[0]->getType()->getPrimitiveSizeInBits() == 32 ?
9565                         Intrinsic::x86_avx512_vfmadd_f32 :
9566                         Intrinsic::x86_avx512_vfmadd_f64;
9567     Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
9568                                  {Ops[0], Ops[1], Ops[2], Ops[4]});
9569   } else {
9570     Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
9571     Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
9572   }
9573   // If we have more than 3 arguments, we need to do masking.
9574   if (Ops.size() > 3) {
9575     Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
9576                                : Ops[PTIdx];
9577 
9578     // If we negated the accumulator and the its the PassThru value we need to
9579     // bypass the negate. Conveniently Upper should be the same thing in this
9580     // case.
9581     if (NegAcc && PTIdx == 2)
9582       PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
9583 
9584     Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
9585   }
9586   return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
9587 }
9588 
9589 static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
9590                            ArrayRef<Value *> Ops) {
9591   llvm::Type *Ty = Ops[0]->getType();
9592   // Arguments have a vXi32 type so cast to vXi64.
9593   Ty = llvm::VectorType::get(CGF.Int64Ty,
9594                              Ty->getPrimitiveSizeInBits() / 64);
9595   Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
9596   Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
9597 
9598   if (IsSigned) {
9599     // Shift left then arithmetic shift right.
9600     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
9601     LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
9602     LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
9603     RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
9604     RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
9605   } else {
9606     // Clear the upper bits.
9607     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
9608     LHS = CGF.Builder.CreateAnd(LHS, Mask);
9609     RHS = CGF.Builder.CreateAnd(RHS, Mask);
9610   }
9611 
9612   return CGF.Builder.CreateMul(LHS, RHS);
9613 }
9614 
9615 // Emit a masked pternlog intrinsic. This only exists because the header has to
9616 // use a macro and we aren't able to pass the input argument to a pternlog
9617 // builtin and a select builtin without evaluating it twice.
9618 static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
9619                              ArrayRef<Value *> Ops) {
9620   llvm::Type *Ty = Ops[0]->getType();
9621 
9622   unsigned VecWidth = Ty->getPrimitiveSizeInBits();
9623   unsigned EltWidth = Ty->getScalarSizeInBits();
9624   Intrinsic::ID IID;
9625   if (VecWidth == 128 && EltWidth == 32)
9626     IID = Intrinsic::x86_avx512_pternlog_d_128;
9627   else if (VecWidth == 256 && EltWidth == 32)
9628     IID = Intrinsic::x86_avx512_pternlog_d_256;
9629   else if (VecWidth == 512 && EltWidth == 32)
9630     IID = Intrinsic::x86_avx512_pternlog_d_512;
9631   else if (VecWidth == 128 && EltWidth == 64)
9632     IID = Intrinsic::x86_avx512_pternlog_q_128;
9633   else if (VecWidth == 256 && EltWidth == 64)
9634     IID = Intrinsic::x86_avx512_pternlog_q_256;
9635   else if (VecWidth == 512 && EltWidth == 64)
9636     IID = Intrinsic::x86_avx512_pternlog_q_512;
9637   else
9638     llvm_unreachable("Unexpected intrinsic");
9639 
9640   Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
9641                                           Ops.drop_back());
9642   Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
9643   return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
9644 }
9645 
9646 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
9647                               llvm::Type *DstTy) {
9648   unsigned NumberOfElements = DstTy->getVectorNumElements();
9649   Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
9650   return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
9651 }
9652 
9653 // Emit addition or subtraction with signed/unsigned saturation.
9654 static Value *EmitX86AddSubSatExpr(CodeGenFunction &CGF,
9655                                    ArrayRef<Value *> Ops, bool IsSigned,
9656                                    bool IsAddition) {
9657   Intrinsic::ID IID =
9658       IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
9659                : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
9660   llvm::Function *F = CGF.CGM.getIntrinsic(IID, Ops[0]->getType());
9661   return CGF.Builder.CreateCall(F, {Ops[0], Ops[1]});
9662 }
9663 
9664 Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
9665   const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
9666   StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
9667   return EmitX86CpuIs(CPUStr);
9668 }
9669 
9670 Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
9671 
9672   llvm::Type *Int32Ty = Builder.getInt32Ty();
9673 
9674   // Matching the struct layout from the compiler-rt/libgcc structure that is
9675   // filled in:
9676   // unsigned int __cpu_vendor;
9677   // unsigned int __cpu_type;
9678   // unsigned int __cpu_subtype;
9679   // unsigned int __cpu_features[1];
9680   llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
9681                                           llvm::ArrayType::get(Int32Ty, 1));
9682 
9683   // Grab the global __cpu_model.
9684   llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
9685   cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
9686 
9687   // Calculate the index needed to access the correct field based on the
9688   // range. Also adjust the expected value.
9689   unsigned Index;
9690   unsigned Value;
9691   std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
9692 #define X86_VENDOR(ENUM, STRING)                                               \
9693   .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
9694 #define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS)             \
9695   .Cases(STR, ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
9696 #define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR)                               \
9697   .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
9698 #define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR)                            \
9699   .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
9700 #include "llvm/Support/X86TargetParser.def"
9701                                .Default({0, 0});
9702   assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
9703 
9704   // Grab the appropriate field from __cpu_model.
9705   llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
9706                          ConstantInt::get(Int32Ty, Index)};
9707   llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
9708   CpuValue = Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4));
9709 
9710   // Check the value of the field against the requested value.
9711   return Builder.CreateICmpEQ(CpuValue,
9712                                   llvm::ConstantInt::get(Int32Ty, Value));
9713 }
9714 
9715 Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
9716   const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
9717   StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
9718   return EmitX86CpuSupports(FeatureStr);
9719 }
9720 
9721 uint64_t
9722 CodeGenFunction::GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs) {
9723   // Processor features and mapping to processor feature value.
9724   uint64_t FeaturesMask = 0;
9725   for (const StringRef &FeatureStr : FeatureStrs) {
9726     unsigned Feature =
9727         StringSwitch<unsigned>(FeatureStr)
9728 #define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL)
9729 #include "llvm/Support/X86TargetParser.def"
9730         ;
9731     FeaturesMask |= (1ULL << Feature);
9732   }
9733   return FeaturesMask;
9734 }
9735 
9736 Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
9737   return EmitX86CpuSupports(GetX86CpuSupportsMask(FeatureStrs));
9738 }
9739 
9740 llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) {
9741   uint32_t Features1 = Lo_32(FeaturesMask);
9742   uint32_t Features2 = Hi_32(FeaturesMask);
9743 
9744   Value *Result = Builder.getTrue();
9745 
9746   if (Features1 != 0) {
9747     // Matching the struct layout from the compiler-rt/libgcc structure that is
9748     // filled in:
9749     // unsigned int __cpu_vendor;
9750     // unsigned int __cpu_type;
9751     // unsigned int __cpu_subtype;
9752     // unsigned int __cpu_features[1];
9753     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
9754                                             llvm::ArrayType::get(Int32Ty, 1));
9755 
9756     // Grab the global __cpu_model.
9757     llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
9758     cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
9759 
9760     // Grab the first (0th) element from the field __cpu_features off of the
9761     // global in the struct STy.
9762     Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
9763                      Builder.getInt32(0)};
9764     Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
9765     Value *Features =
9766         Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4));
9767 
9768     // Check the value of the bit corresponding to the feature requested.
9769     Value *Mask = Builder.getInt32(Features1);
9770     Value *Bitset = Builder.CreateAnd(Features, Mask);
9771     Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
9772     Result = Builder.CreateAnd(Result, Cmp);
9773   }
9774 
9775   if (Features2 != 0) {
9776     llvm::Constant *CpuFeatures2 = CGM.CreateRuntimeVariable(Int32Ty,
9777                                                              "__cpu_features2");
9778     cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
9779 
9780     Value *Features =
9781         Builder.CreateAlignedLoad(CpuFeatures2, CharUnits::fromQuantity(4));
9782 
9783     // Check the value of the bit corresponding to the feature requested.
9784     Value *Mask = Builder.getInt32(Features2);
9785     Value *Bitset = Builder.CreateAnd(Features, Mask);
9786     Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
9787     Result = Builder.CreateAnd(Result, Cmp);
9788   }
9789 
9790   return Result;
9791 }
9792 
9793 Value *CodeGenFunction::EmitX86CpuInit() {
9794   llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
9795                                                     /*Variadic*/ false);
9796   llvm::FunctionCallee Func =
9797       CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
9798   cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
9799   cast<llvm::GlobalValue>(Func.getCallee())
9800       ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
9801   return Builder.CreateCall(Func);
9802 }
9803 
9804 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
9805                                            const CallExpr *E) {
9806   if (BuiltinID == X86::BI__builtin_cpu_is)
9807     return EmitX86CpuIs(E);
9808   if (BuiltinID == X86::BI__builtin_cpu_supports)
9809     return EmitX86CpuSupports(E);
9810   if (BuiltinID == X86::BI__builtin_cpu_init)
9811     return EmitX86CpuInit();
9812 
9813   SmallVector<Value*, 4> Ops;
9814 
9815   // Find out if any arguments are required to be integer constant expressions.
9816   unsigned ICEArguments = 0;
9817   ASTContext::GetBuiltinTypeError Error;
9818   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
9819   assert(Error == ASTContext::GE_None && "Should not codegen an error");
9820 
9821   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
9822     // If this is a normal argument, just emit it as a scalar.
9823     if ((ICEArguments & (1 << i)) == 0) {
9824       Ops.push_back(EmitScalarExpr(E->getArg(i)));
9825       continue;
9826     }
9827 
9828     // If this is required to be a constant, constant fold it so that we know
9829     // that the generated intrinsic gets a ConstantInt.
9830     llvm::APSInt Result;
9831     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
9832     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
9833     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
9834   }
9835 
9836   // These exist so that the builtin that takes an immediate can be bounds
9837   // checked by clang to avoid passing bad immediates to the backend. Since
9838   // AVX has a larger immediate than SSE we would need separate builtins to
9839   // do the different bounds checking. Rather than create a clang specific
9840   // SSE only builtin, this implements eight separate builtins to match gcc
9841   // implementation.
9842   auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
9843     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
9844     llvm::Function *F = CGM.getIntrinsic(ID);
9845     return Builder.CreateCall(F, Ops);
9846   };
9847 
9848   // For the vector forms of FP comparisons, translate the builtins directly to
9849   // IR.
9850   // TODO: The builtins could be removed if the SSE header files used vector
9851   // extension comparisons directly (vector ordered/unordered may need
9852   // additional support via __builtin_isnan()).
9853   auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
9854     Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
9855     llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
9856     llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
9857     Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
9858     return Builder.CreateBitCast(Sext, FPVecTy);
9859   };
9860 
9861   switch (BuiltinID) {
9862   default: return nullptr;
9863   case X86::BI_mm_prefetch: {
9864     Value *Address = Ops[0];
9865     ConstantInt *C = cast<ConstantInt>(Ops[1]);
9866     Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
9867     Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
9868     Value *Data = ConstantInt::get(Int32Ty, 1);
9869     Function *F = CGM.getIntrinsic(Intrinsic::prefetch);
9870     return Builder.CreateCall(F, {Address, RW, Locality, Data});
9871   }
9872   case X86::BI_mm_clflush: {
9873     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
9874                               Ops[0]);
9875   }
9876   case X86::BI_mm_lfence: {
9877     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
9878   }
9879   case X86::BI_mm_mfence: {
9880     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
9881   }
9882   case X86::BI_mm_sfence: {
9883     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
9884   }
9885   case X86::BI_mm_pause: {
9886     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
9887   }
9888   case X86::BI__rdtsc: {
9889     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
9890   }
9891   case X86::BI__builtin_ia32_rdtscp: {
9892     Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
9893     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
9894                                       Ops[0]);
9895     return Builder.CreateExtractValue(Call, 0);
9896   }
9897   case X86::BI__builtin_ia32_lzcnt_u16:
9898   case X86::BI__builtin_ia32_lzcnt_u32:
9899   case X86::BI__builtin_ia32_lzcnt_u64: {
9900     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
9901     return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
9902   }
9903   case X86::BI__builtin_ia32_tzcnt_u16:
9904   case X86::BI__builtin_ia32_tzcnt_u32:
9905   case X86::BI__builtin_ia32_tzcnt_u64: {
9906     Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
9907     return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
9908   }
9909   case X86::BI__builtin_ia32_undef128:
9910   case X86::BI__builtin_ia32_undef256:
9911   case X86::BI__builtin_ia32_undef512:
9912     // The x86 definition of "undef" is not the same as the LLVM definition
9913     // (PR32176). We leave optimizing away an unnecessary zero constant to the
9914     // IR optimizer and backend.
9915     // TODO: If we had a "freeze" IR instruction to generate a fixed undef
9916     // value, we should use that here instead of a zero.
9917     return llvm::Constant::getNullValue(ConvertType(E->getType()));
9918   case X86::BI__builtin_ia32_vec_init_v8qi:
9919   case X86::BI__builtin_ia32_vec_init_v4hi:
9920   case X86::BI__builtin_ia32_vec_init_v2si:
9921     return Builder.CreateBitCast(BuildVector(Ops),
9922                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
9923   case X86::BI__builtin_ia32_vec_ext_v2si:
9924   case X86::BI__builtin_ia32_vec_ext_v16qi:
9925   case X86::BI__builtin_ia32_vec_ext_v8hi:
9926   case X86::BI__builtin_ia32_vec_ext_v4si:
9927   case X86::BI__builtin_ia32_vec_ext_v4sf:
9928   case X86::BI__builtin_ia32_vec_ext_v2di:
9929   case X86::BI__builtin_ia32_vec_ext_v32qi:
9930   case X86::BI__builtin_ia32_vec_ext_v16hi:
9931   case X86::BI__builtin_ia32_vec_ext_v8si:
9932   case X86::BI__builtin_ia32_vec_ext_v4di: {
9933     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
9934     uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
9935     Index &= NumElts - 1;
9936     // These builtins exist so we can ensure the index is an ICE and in range.
9937     // Otherwise we could just do this in the header file.
9938     return Builder.CreateExtractElement(Ops[0], Index);
9939   }
9940   case X86::BI__builtin_ia32_vec_set_v16qi:
9941   case X86::BI__builtin_ia32_vec_set_v8hi:
9942   case X86::BI__builtin_ia32_vec_set_v4si:
9943   case X86::BI__builtin_ia32_vec_set_v2di:
9944   case X86::BI__builtin_ia32_vec_set_v32qi:
9945   case X86::BI__builtin_ia32_vec_set_v16hi:
9946   case X86::BI__builtin_ia32_vec_set_v8si:
9947   case X86::BI__builtin_ia32_vec_set_v4di: {
9948     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
9949     unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
9950     Index &= NumElts - 1;
9951     // These builtins exist so we can ensure the index is an ICE and in range.
9952     // Otherwise we could just do this in the header file.
9953     return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
9954   }
9955   case X86::BI_mm_setcsr:
9956   case X86::BI__builtin_ia32_ldmxcsr: {
9957     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
9958     Builder.CreateStore(Ops[0], Tmp);
9959     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
9960                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
9961   }
9962   case X86::BI_mm_getcsr:
9963   case X86::BI__builtin_ia32_stmxcsr: {
9964     Address Tmp = CreateMemTemp(E->getType());
9965     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
9966                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
9967     return Builder.CreateLoad(Tmp, "stmxcsr");
9968   }
9969   case X86::BI__builtin_ia32_xsave:
9970   case X86::BI__builtin_ia32_xsave64:
9971   case X86::BI__builtin_ia32_xrstor:
9972   case X86::BI__builtin_ia32_xrstor64:
9973   case X86::BI__builtin_ia32_xsaveopt:
9974   case X86::BI__builtin_ia32_xsaveopt64:
9975   case X86::BI__builtin_ia32_xrstors:
9976   case X86::BI__builtin_ia32_xrstors64:
9977   case X86::BI__builtin_ia32_xsavec:
9978   case X86::BI__builtin_ia32_xsavec64:
9979   case X86::BI__builtin_ia32_xsaves:
9980   case X86::BI__builtin_ia32_xsaves64:
9981   case X86::BI__builtin_ia32_xsetbv:
9982   case X86::BI_xsetbv: {
9983     Intrinsic::ID ID;
9984 #define INTRINSIC_X86_XSAVE_ID(NAME) \
9985     case X86::BI__builtin_ia32_##NAME: \
9986       ID = Intrinsic::x86_##NAME; \
9987       break
9988     switch (BuiltinID) {
9989     default: llvm_unreachable("Unsupported intrinsic!");
9990     INTRINSIC_X86_XSAVE_ID(xsave);
9991     INTRINSIC_X86_XSAVE_ID(xsave64);
9992     INTRINSIC_X86_XSAVE_ID(xrstor);
9993     INTRINSIC_X86_XSAVE_ID(xrstor64);
9994     INTRINSIC_X86_XSAVE_ID(xsaveopt);
9995     INTRINSIC_X86_XSAVE_ID(xsaveopt64);
9996     INTRINSIC_X86_XSAVE_ID(xrstors);
9997     INTRINSIC_X86_XSAVE_ID(xrstors64);
9998     INTRINSIC_X86_XSAVE_ID(xsavec);
9999     INTRINSIC_X86_XSAVE_ID(xsavec64);
10000     INTRINSIC_X86_XSAVE_ID(xsaves);
10001     INTRINSIC_X86_XSAVE_ID(xsaves64);
10002     INTRINSIC_X86_XSAVE_ID(xsetbv);
10003     case X86::BI_xsetbv:
10004       ID = Intrinsic::x86_xsetbv;
10005       break;
10006     }
10007 #undef INTRINSIC_X86_XSAVE_ID
10008     Value *Mhi = Builder.CreateTrunc(
10009       Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
10010     Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
10011     Ops[1] = Mhi;
10012     Ops.push_back(Mlo);
10013     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
10014   }
10015   case X86::BI__builtin_ia32_xgetbv:
10016   case X86::BI_xgetbv:
10017     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
10018   case X86::BI__builtin_ia32_storedqudi128_mask:
10019   case X86::BI__builtin_ia32_storedqusi128_mask:
10020   case X86::BI__builtin_ia32_storedquhi128_mask:
10021   case X86::BI__builtin_ia32_storedquqi128_mask:
10022   case X86::BI__builtin_ia32_storeupd128_mask:
10023   case X86::BI__builtin_ia32_storeups128_mask:
10024   case X86::BI__builtin_ia32_storedqudi256_mask:
10025   case X86::BI__builtin_ia32_storedqusi256_mask:
10026   case X86::BI__builtin_ia32_storedquhi256_mask:
10027   case X86::BI__builtin_ia32_storedquqi256_mask:
10028   case X86::BI__builtin_ia32_storeupd256_mask:
10029   case X86::BI__builtin_ia32_storeups256_mask:
10030   case X86::BI__builtin_ia32_storedqudi512_mask:
10031   case X86::BI__builtin_ia32_storedqusi512_mask:
10032   case X86::BI__builtin_ia32_storedquhi512_mask:
10033   case X86::BI__builtin_ia32_storedquqi512_mask:
10034   case X86::BI__builtin_ia32_storeupd512_mask:
10035   case X86::BI__builtin_ia32_storeups512_mask:
10036     return EmitX86MaskedStore(*this, Ops, 1);
10037 
10038   case X86::BI__builtin_ia32_storess128_mask:
10039   case X86::BI__builtin_ia32_storesd128_mask: {
10040     return EmitX86MaskedStore(*this, Ops, 1);
10041   }
10042   case X86::BI__builtin_ia32_vpopcntb_128:
10043   case X86::BI__builtin_ia32_vpopcntd_128:
10044   case X86::BI__builtin_ia32_vpopcntq_128:
10045   case X86::BI__builtin_ia32_vpopcntw_128:
10046   case X86::BI__builtin_ia32_vpopcntb_256:
10047   case X86::BI__builtin_ia32_vpopcntd_256:
10048   case X86::BI__builtin_ia32_vpopcntq_256:
10049   case X86::BI__builtin_ia32_vpopcntw_256:
10050   case X86::BI__builtin_ia32_vpopcntb_512:
10051   case X86::BI__builtin_ia32_vpopcntd_512:
10052   case X86::BI__builtin_ia32_vpopcntq_512:
10053   case X86::BI__builtin_ia32_vpopcntw_512: {
10054     llvm::Type *ResultType = ConvertType(E->getType());
10055     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
10056     return Builder.CreateCall(F, Ops);
10057   }
10058   case X86::BI__builtin_ia32_cvtmask2b128:
10059   case X86::BI__builtin_ia32_cvtmask2b256:
10060   case X86::BI__builtin_ia32_cvtmask2b512:
10061   case X86::BI__builtin_ia32_cvtmask2w128:
10062   case X86::BI__builtin_ia32_cvtmask2w256:
10063   case X86::BI__builtin_ia32_cvtmask2w512:
10064   case X86::BI__builtin_ia32_cvtmask2d128:
10065   case X86::BI__builtin_ia32_cvtmask2d256:
10066   case X86::BI__builtin_ia32_cvtmask2d512:
10067   case X86::BI__builtin_ia32_cvtmask2q128:
10068   case X86::BI__builtin_ia32_cvtmask2q256:
10069   case X86::BI__builtin_ia32_cvtmask2q512:
10070     return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
10071 
10072   case X86::BI__builtin_ia32_cvtb2mask128:
10073   case X86::BI__builtin_ia32_cvtb2mask256:
10074   case X86::BI__builtin_ia32_cvtb2mask512:
10075   case X86::BI__builtin_ia32_cvtw2mask128:
10076   case X86::BI__builtin_ia32_cvtw2mask256:
10077   case X86::BI__builtin_ia32_cvtw2mask512:
10078   case X86::BI__builtin_ia32_cvtd2mask128:
10079   case X86::BI__builtin_ia32_cvtd2mask256:
10080   case X86::BI__builtin_ia32_cvtd2mask512:
10081   case X86::BI__builtin_ia32_cvtq2mask128:
10082   case X86::BI__builtin_ia32_cvtq2mask256:
10083   case X86::BI__builtin_ia32_cvtq2mask512:
10084     return EmitX86ConvertToMask(*this, Ops[0]);
10085 
10086   case X86::BI__builtin_ia32_cvtdq2ps512_mask:
10087   case X86::BI__builtin_ia32_cvtqq2ps512_mask:
10088   case X86::BI__builtin_ia32_cvtqq2pd512_mask:
10089     return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/true);
10090   case X86::BI__builtin_ia32_cvtudq2ps512_mask:
10091   case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
10092   case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
10093     return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/false);
10094 
10095   case X86::BI__builtin_ia32_vfmaddss3:
10096   case X86::BI__builtin_ia32_vfmaddsd3:
10097   case X86::BI__builtin_ia32_vfmaddss3_mask:
10098   case X86::BI__builtin_ia32_vfmaddsd3_mask:
10099     return EmitScalarFMAExpr(*this, Ops, Ops[0]);
10100   case X86::BI__builtin_ia32_vfmaddss:
10101   case X86::BI__builtin_ia32_vfmaddsd:
10102     return EmitScalarFMAExpr(*this, Ops,
10103                              Constant::getNullValue(Ops[0]->getType()));
10104   case X86::BI__builtin_ia32_vfmaddss3_maskz:
10105   case X86::BI__builtin_ia32_vfmaddsd3_maskz:
10106     return EmitScalarFMAExpr(*this, Ops, Ops[0], /*ZeroMask*/true);
10107   case X86::BI__builtin_ia32_vfmaddss3_mask3:
10108   case X86::BI__builtin_ia32_vfmaddsd3_mask3:
10109     return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2);
10110   case X86::BI__builtin_ia32_vfmsubss3_mask3:
10111   case X86::BI__builtin_ia32_vfmsubsd3_mask3:
10112     return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2,
10113                              /*NegAcc*/true);
10114   case X86::BI__builtin_ia32_vfmaddps:
10115   case X86::BI__builtin_ia32_vfmaddpd:
10116   case X86::BI__builtin_ia32_vfmaddps256:
10117   case X86::BI__builtin_ia32_vfmaddpd256:
10118   case X86::BI__builtin_ia32_vfmaddps512_mask:
10119   case X86::BI__builtin_ia32_vfmaddps512_maskz:
10120   case X86::BI__builtin_ia32_vfmaddps512_mask3:
10121   case X86::BI__builtin_ia32_vfmsubps512_mask3:
10122   case X86::BI__builtin_ia32_vfmaddpd512_mask:
10123   case X86::BI__builtin_ia32_vfmaddpd512_maskz:
10124   case X86::BI__builtin_ia32_vfmaddpd512_mask3:
10125   case X86::BI__builtin_ia32_vfmsubpd512_mask3:
10126     return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false);
10127   case X86::BI__builtin_ia32_vfmaddsubps:
10128   case X86::BI__builtin_ia32_vfmaddsubpd:
10129   case X86::BI__builtin_ia32_vfmaddsubps256:
10130   case X86::BI__builtin_ia32_vfmaddsubpd256:
10131   case X86::BI__builtin_ia32_vfmaddsubps512_mask:
10132   case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
10133   case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
10134   case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
10135   case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
10136   case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
10137   case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
10138   case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
10139     return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/true);
10140 
10141   case X86::BI__builtin_ia32_movdqa32store128_mask:
10142   case X86::BI__builtin_ia32_movdqa64store128_mask:
10143   case X86::BI__builtin_ia32_storeaps128_mask:
10144   case X86::BI__builtin_ia32_storeapd128_mask:
10145   case X86::BI__builtin_ia32_movdqa32store256_mask:
10146   case X86::BI__builtin_ia32_movdqa64store256_mask:
10147   case X86::BI__builtin_ia32_storeaps256_mask:
10148   case X86::BI__builtin_ia32_storeapd256_mask:
10149   case X86::BI__builtin_ia32_movdqa32store512_mask:
10150   case X86::BI__builtin_ia32_movdqa64store512_mask:
10151   case X86::BI__builtin_ia32_storeaps512_mask:
10152   case X86::BI__builtin_ia32_storeapd512_mask: {
10153     unsigned Align =
10154       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
10155     return EmitX86MaskedStore(*this, Ops, Align);
10156   }
10157   case X86::BI__builtin_ia32_loadups128_mask:
10158   case X86::BI__builtin_ia32_loadups256_mask:
10159   case X86::BI__builtin_ia32_loadups512_mask:
10160   case X86::BI__builtin_ia32_loadupd128_mask:
10161   case X86::BI__builtin_ia32_loadupd256_mask:
10162   case X86::BI__builtin_ia32_loadupd512_mask:
10163   case X86::BI__builtin_ia32_loaddquqi128_mask:
10164   case X86::BI__builtin_ia32_loaddquqi256_mask:
10165   case X86::BI__builtin_ia32_loaddquqi512_mask:
10166   case X86::BI__builtin_ia32_loaddquhi128_mask:
10167   case X86::BI__builtin_ia32_loaddquhi256_mask:
10168   case X86::BI__builtin_ia32_loaddquhi512_mask:
10169   case X86::BI__builtin_ia32_loaddqusi128_mask:
10170   case X86::BI__builtin_ia32_loaddqusi256_mask:
10171   case X86::BI__builtin_ia32_loaddqusi512_mask:
10172   case X86::BI__builtin_ia32_loaddqudi128_mask:
10173   case X86::BI__builtin_ia32_loaddqudi256_mask:
10174   case X86::BI__builtin_ia32_loaddqudi512_mask:
10175     return EmitX86MaskedLoad(*this, Ops, 1);
10176 
10177   case X86::BI__builtin_ia32_loadss128_mask:
10178   case X86::BI__builtin_ia32_loadsd128_mask:
10179     return EmitX86MaskedLoad(*this, Ops, 1);
10180 
10181   case X86::BI__builtin_ia32_loadaps128_mask:
10182   case X86::BI__builtin_ia32_loadaps256_mask:
10183   case X86::BI__builtin_ia32_loadaps512_mask:
10184   case X86::BI__builtin_ia32_loadapd128_mask:
10185   case X86::BI__builtin_ia32_loadapd256_mask:
10186   case X86::BI__builtin_ia32_loadapd512_mask:
10187   case X86::BI__builtin_ia32_movdqa32load128_mask:
10188   case X86::BI__builtin_ia32_movdqa32load256_mask:
10189   case X86::BI__builtin_ia32_movdqa32load512_mask:
10190   case X86::BI__builtin_ia32_movdqa64load128_mask:
10191   case X86::BI__builtin_ia32_movdqa64load256_mask:
10192   case X86::BI__builtin_ia32_movdqa64load512_mask: {
10193     unsigned Align =
10194       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
10195     return EmitX86MaskedLoad(*this, Ops, Align);
10196   }
10197 
10198   case X86::BI__builtin_ia32_expandloaddf128_mask:
10199   case X86::BI__builtin_ia32_expandloaddf256_mask:
10200   case X86::BI__builtin_ia32_expandloaddf512_mask:
10201   case X86::BI__builtin_ia32_expandloadsf128_mask:
10202   case X86::BI__builtin_ia32_expandloadsf256_mask:
10203   case X86::BI__builtin_ia32_expandloadsf512_mask:
10204   case X86::BI__builtin_ia32_expandloaddi128_mask:
10205   case X86::BI__builtin_ia32_expandloaddi256_mask:
10206   case X86::BI__builtin_ia32_expandloaddi512_mask:
10207   case X86::BI__builtin_ia32_expandloadsi128_mask:
10208   case X86::BI__builtin_ia32_expandloadsi256_mask:
10209   case X86::BI__builtin_ia32_expandloadsi512_mask:
10210   case X86::BI__builtin_ia32_expandloadhi128_mask:
10211   case X86::BI__builtin_ia32_expandloadhi256_mask:
10212   case X86::BI__builtin_ia32_expandloadhi512_mask:
10213   case X86::BI__builtin_ia32_expandloadqi128_mask:
10214   case X86::BI__builtin_ia32_expandloadqi256_mask:
10215   case X86::BI__builtin_ia32_expandloadqi512_mask:
10216     return EmitX86ExpandLoad(*this, Ops);
10217 
10218   case X86::BI__builtin_ia32_compressstoredf128_mask:
10219   case X86::BI__builtin_ia32_compressstoredf256_mask:
10220   case X86::BI__builtin_ia32_compressstoredf512_mask:
10221   case X86::BI__builtin_ia32_compressstoresf128_mask:
10222   case X86::BI__builtin_ia32_compressstoresf256_mask:
10223   case X86::BI__builtin_ia32_compressstoresf512_mask:
10224   case X86::BI__builtin_ia32_compressstoredi128_mask:
10225   case X86::BI__builtin_ia32_compressstoredi256_mask:
10226   case X86::BI__builtin_ia32_compressstoredi512_mask:
10227   case X86::BI__builtin_ia32_compressstoresi128_mask:
10228   case X86::BI__builtin_ia32_compressstoresi256_mask:
10229   case X86::BI__builtin_ia32_compressstoresi512_mask:
10230   case X86::BI__builtin_ia32_compressstorehi128_mask:
10231   case X86::BI__builtin_ia32_compressstorehi256_mask:
10232   case X86::BI__builtin_ia32_compressstorehi512_mask:
10233   case X86::BI__builtin_ia32_compressstoreqi128_mask:
10234   case X86::BI__builtin_ia32_compressstoreqi256_mask:
10235   case X86::BI__builtin_ia32_compressstoreqi512_mask:
10236     return EmitX86CompressStore(*this, Ops);
10237 
10238   case X86::BI__builtin_ia32_expanddf128_mask:
10239   case X86::BI__builtin_ia32_expanddf256_mask:
10240   case X86::BI__builtin_ia32_expanddf512_mask:
10241   case X86::BI__builtin_ia32_expandsf128_mask:
10242   case X86::BI__builtin_ia32_expandsf256_mask:
10243   case X86::BI__builtin_ia32_expandsf512_mask:
10244   case X86::BI__builtin_ia32_expanddi128_mask:
10245   case X86::BI__builtin_ia32_expanddi256_mask:
10246   case X86::BI__builtin_ia32_expanddi512_mask:
10247   case X86::BI__builtin_ia32_expandsi128_mask:
10248   case X86::BI__builtin_ia32_expandsi256_mask:
10249   case X86::BI__builtin_ia32_expandsi512_mask:
10250   case X86::BI__builtin_ia32_expandhi128_mask:
10251   case X86::BI__builtin_ia32_expandhi256_mask:
10252   case X86::BI__builtin_ia32_expandhi512_mask:
10253   case X86::BI__builtin_ia32_expandqi128_mask:
10254   case X86::BI__builtin_ia32_expandqi256_mask:
10255   case X86::BI__builtin_ia32_expandqi512_mask:
10256     return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
10257 
10258   case X86::BI__builtin_ia32_compressdf128_mask:
10259   case X86::BI__builtin_ia32_compressdf256_mask:
10260   case X86::BI__builtin_ia32_compressdf512_mask:
10261   case X86::BI__builtin_ia32_compresssf128_mask:
10262   case X86::BI__builtin_ia32_compresssf256_mask:
10263   case X86::BI__builtin_ia32_compresssf512_mask:
10264   case X86::BI__builtin_ia32_compressdi128_mask:
10265   case X86::BI__builtin_ia32_compressdi256_mask:
10266   case X86::BI__builtin_ia32_compressdi512_mask:
10267   case X86::BI__builtin_ia32_compresssi128_mask:
10268   case X86::BI__builtin_ia32_compresssi256_mask:
10269   case X86::BI__builtin_ia32_compresssi512_mask:
10270   case X86::BI__builtin_ia32_compresshi128_mask:
10271   case X86::BI__builtin_ia32_compresshi256_mask:
10272   case X86::BI__builtin_ia32_compresshi512_mask:
10273   case X86::BI__builtin_ia32_compressqi128_mask:
10274   case X86::BI__builtin_ia32_compressqi256_mask:
10275   case X86::BI__builtin_ia32_compressqi512_mask:
10276     return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
10277 
10278   case X86::BI__builtin_ia32_gather3div2df:
10279   case X86::BI__builtin_ia32_gather3div2di:
10280   case X86::BI__builtin_ia32_gather3div4df:
10281   case X86::BI__builtin_ia32_gather3div4di:
10282   case X86::BI__builtin_ia32_gather3div4sf:
10283   case X86::BI__builtin_ia32_gather3div4si:
10284   case X86::BI__builtin_ia32_gather3div8sf:
10285   case X86::BI__builtin_ia32_gather3div8si:
10286   case X86::BI__builtin_ia32_gather3siv2df:
10287   case X86::BI__builtin_ia32_gather3siv2di:
10288   case X86::BI__builtin_ia32_gather3siv4df:
10289   case X86::BI__builtin_ia32_gather3siv4di:
10290   case X86::BI__builtin_ia32_gather3siv4sf:
10291   case X86::BI__builtin_ia32_gather3siv4si:
10292   case X86::BI__builtin_ia32_gather3siv8sf:
10293   case X86::BI__builtin_ia32_gather3siv8si:
10294   case X86::BI__builtin_ia32_gathersiv8df:
10295   case X86::BI__builtin_ia32_gathersiv16sf:
10296   case X86::BI__builtin_ia32_gatherdiv8df:
10297   case X86::BI__builtin_ia32_gatherdiv16sf:
10298   case X86::BI__builtin_ia32_gathersiv8di:
10299   case X86::BI__builtin_ia32_gathersiv16si:
10300   case X86::BI__builtin_ia32_gatherdiv8di:
10301   case X86::BI__builtin_ia32_gatherdiv16si: {
10302     Intrinsic::ID IID;
10303     switch (BuiltinID) {
10304     default: llvm_unreachable("Unexpected builtin");
10305     case X86::BI__builtin_ia32_gather3div2df:
10306       IID = Intrinsic::x86_avx512_mask_gather3div2_df;
10307       break;
10308     case X86::BI__builtin_ia32_gather3div2di:
10309       IID = Intrinsic::x86_avx512_mask_gather3div2_di;
10310       break;
10311     case X86::BI__builtin_ia32_gather3div4df:
10312       IID = Intrinsic::x86_avx512_mask_gather3div4_df;
10313       break;
10314     case X86::BI__builtin_ia32_gather3div4di:
10315       IID = Intrinsic::x86_avx512_mask_gather3div4_di;
10316       break;
10317     case X86::BI__builtin_ia32_gather3div4sf:
10318       IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
10319       break;
10320     case X86::BI__builtin_ia32_gather3div4si:
10321       IID = Intrinsic::x86_avx512_mask_gather3div4_si;
10322       break;
10323     case X86::BI__builtin_ia32_gather3div8sf:
10324       IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
10325       break;
10326     case X86::BI__builtin_ia32_gather3div8si:
10327       IID = Intrinsic::x86_avx512_mask_gather3div8_si;
10328       break;
10329     case X86::BI__builtin_ia32_gather3siv2df:
10330       IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
10331       break;
10332     case X86::BI__builtin_ia32_gather3siv2di:
10333       IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
10334       break;
10335     case X86::BI__builtin_ia32_gather3siv4df:
10336       IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
10337       break;
10338     case X86::BI__builtin_ia32_gather3siv4di:
10339       IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
10340       break;
10341     case X86::BI__builtin_ia32_gather3siv4sf:
10342       IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
10343       break;
10344     case X86::BI__builtin_ia32_gather3siv4si:
10345       IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
10346       break;
10347     case X86::BI__builtin_ia32_gather3siv8sf:
10348       IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
10349       break;
10350     case X86::BI__builtin_ia32_gather3siv8si:
10351       IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
10352       break;
10353     case X86::BI__builtin_ia32_gathersiv8df:
10354       IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
10355       break;
10356     case X86::BI__builtin_ia32_gathersiv16sf:
10357       IID = Intrinsic::x86_avx512_mask_gather_dps_512;
10358       break;
10359     case X86::BI__builtin_ia32_gatherdiv8df:
10360       IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
10361       break;
10362     case X86::BI__builtin_ia32_gatherdiv16sf:
10363       IID = Intrinsic::x86_avx512_mask_gather_qps_512;
10364       break;
10365     case X86::BI__builtin_ia32_gathersiv8di:
10366       IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
10367       break;
10368     case X86::BI__builtin_ia32_gathersiv16si:
10369       IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
10370       break;
10371     case X86::BI__builtin_ia32_gatherdiv8di:
10372       IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
10373       break;
10374     case X86::BI__builtin_ia32_gatherdiv16si:
10375       IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
10376       break;
10377     }
10378 
10379     unsigned MinElts = std::min(Ops[0]->getType()->getVectorNumElements(),
10380                                 Ops[2]->getType()->getVectorNumElements());
10381     Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
10382     Function *Intr = CGM.getIntrinsic(IID);
10383     return Builder.CreateCall(Intr, Ops);
10384   }
10385 
10386   case X86::BI__builtin_ia32_scattersiv8df:
10387   case X86::BI__builtin_ia32_scattersiv16sf:
10388   case X86::BI__builtin_ia32_scatterdiv8df:
10389   case X86::BI__builtin_ia32_scatterdiv16sf:
10390   case X86::BI__builtin_ia32_scattersiv8di:
10391   case X86::BI__builtin_ia32_scattersiv16si:
10392   case X86::BI__builtin_ia32_scatterdiv8di:
10393   case X86::BI__builtin_ia32_scatterdiv16si:
10394   case X86::BI__builtin_ia32_scatterdiv2df:
10395   case X86::BI__builtin_ia32_scatterdiv2di:
10396   case X86::BI__builtin_ia32_scatterdiv4df:
10397   case X86::BI__builtin_ia32_scatterdiv4di:
10398   case X86::BI__builtin_ia32_scatterdiv4sf:
10399   case X86::BI__builtin_ia32_scatterdiv4si:
10400   case X86::BI__builtin_ia32_scatterdiv8sf:
10401   case X86::BI__builtin_ia32_scatterdiv8si:
10402   case X86::BI__builtin_ia32_scattersiv2df:
10403   case X86::BI__builtin_ia32_scattersiv2di:
10404   case X86::BI__builtin_ia32_scattersiv4df:
10405   case X86::BI__builtin_ia32_scattersiv4di:
10406   case X86::BI__builtin_ia32_scattersiv4sf:
10407   case X86::BI__builtin_ia32_scattersiv4si:
10408   case X86::BI__builtin_ia32_scattersiv8sf:
10409   case X86::BI__builtin_ia32_scattersiv8si: {
10410     Intrinsic::ID IID;
10411     switch (BuiltinID) {
10412     default: llvm_unreachable("Unexpected builtin");
10413     case X86::BI__builtin_ia32_scattersiv8df:
10414       IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
10415       break;
10416     case X86::BI__builtin_ia32_scattersiv16sf:
10417       IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
10418       break;
10419     case X86::BI__builtin_ia32_scatterdiv8df:
10420       IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
10421       break;
10422     case X86::BI__builtin_ia32_scatterdiv16sf:
10423       IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
10424       break;
10425     case X86::BI__builtin_ia32_scattersiv8di:
10426       IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
10427       break;
10428     case X86::BI__builtin_ia32_scattersiv16si:
10429       IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
10430       break;
10431     case X86::BI__builtin_ia32_scatterdiv8di:
10432       IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
10433       break;
10434     case X86::BI__builtin_ia32_scatterdiv16si:
10435       IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
10436       break;
10437     case X86::BI__builtin_ia32_scatterdiv2df:
10438       IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
10439       break;
10440     case X86::BI__builtin_ia32_scatterdiv2di:
10441       IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
10442       break;
10443     case X86::BI__builtin_ia32_scatterdiv4df:
10444       IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
10445       break;
10446     case X86::BI__builtin_ia32_scatterdiv4di:
10447       IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
10448       break;
10449     case X86::BI__builtin_ia32_scatterdiv4sf:
10450       IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
10451       break;
10452     case X86::BI__builtin_ia32_scatterdiv4si:
10453       IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
10454       break;
10455     case X86::BI__builtin_ia32_scatterdiv8sf:
10456       IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
10457       break;
10458     case X86::BI__builtin_ia32_scatterdiv8si:
10459       IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
10460       break;
10461     case X86::BI__builtin_ia32_scattersiv2df:
10462       IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
10463       break;
10464     case X86::BI__builtin_ia32_scattersiv2di:
10465       IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
10466       break;
10467     case X86::BI__builtin_ia32_scattersiv4df:
10468       IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
10469       break;
10470     case X86::BI__builtin_ia32_scattersiv4di:
10471       IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
10472       break;
10473     case X86::BI__builtin_ia32_scattersiv4sf:
10474       IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
10475       break;
10476     case X86::BI__builtin_ia32_scattersiv4si:
10477       IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
10478       break;
10479     case X86::BI__builtin_ia32_scattersiv8sf:
10480       IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
10481       break;
10482     case X86::BI__builtin_ia32_scattersiv8si:
10483       IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
10484       break;
10485     }
10486 
10487     unsigned MinElts = std::min(Ops[2]->getType()->getVectorNumElements(),
10488                                 Ops[3]->getType()->getVectorNumElements());
10489     Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
10490     Function *Intr = CGM.getIntrinsic(IID);
10491     return Builder.CreateCall(Intr, Ops);
10492   }
10493 
10494   case X86::BI__builtin_ia32_storehps:
10495   case X86::BI__builtin_ia32_storelps: {
10496     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
10497     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
10498 
10499     // cast val v2i64
10500     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
10501 
10502     // extract (0, 1)
10503     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
10504     Ops[1] = Builder.CreateExtractElement(Ops[1], Index, "extract");
10505 
10506     // cast pointer to i64 & store
10507     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
10508     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
10509   }
10510   case X86::BI__builtin_ia32_vextractf128_pd256:
10511   case X86::BI__builtin_ia32_vextractf128_ps256:
10512   case X86::BI__builtin_ia32_vextractf128_si256:
10513   case X86::BI__builtin_ia32_extract128i256:
10514   case X86::BI__builtin_ia32_extractf64x4_mask:
10515   case X86::BI__builtin_ia32_extractf32x4_mask:
10516   case X86::BI__builtin_ia32_extracti64x4_mask:
10517   case X86::BI__builtin_ia32_extracti32x4_mask:
10518   case X86::BI__builtin_ia32_extractf32x8_mask:
10519   case X86::BI__builtin_ia32_extracti32x8_mask:
10520   case X86::BI__builtin_ia32_extractf32x4_256_mask:
10521   case X86::BI__builtin_ia32_extracti32x4_256_mask:
10522   case X86::BI__builtin_ia32_extractf64x2_256_mask:
10523   case X86::BI__builtin_ia32_extracti64x2_256_mask:
10524   case X86::BI__builtin_ia32_extractf64x2_512_mask:
10525   case X86::BI__builtin_ia32_extracti64x2_512_mask: {
10526     llvm::Type *DstTy = ConvertType(E->getType());
10527     unsigned NumElts = DstTy->getVectorNumElements();
10528     unsigned SrcNumElts = Ops[0]->getType()->getVectorNumElements();
10529     unsigned SubVectors = SrcNumElts / NumElts;
10530     unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
10531     assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
10532     Index &= SubVectors - 1; // Remove any extra bits.
10533     Index *= NumElts;
10534 
10535     uint32_t Indices[16];
10536     for (unsigned i = 0; i != NumElts; ++i)
10537       Indices[i] = i + Index;
10538 
10539     Value *Res = Builder.CreateShuffleVector(Ops[0],
10540                                              UndefValue::get(Ops[0]->getType()),
10541                                              makeArrayRef(Indices, NumElts),
10542                                              "extract");
10543 
10544     if (Ops.size() == 4)
10545       Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
10546 
10547     return Res;
10548   }
10549   case X86::BI__builtin_ia32_vinsertf128_pd256:
10550   case X86::BI__builtin_ia32_vinsertf128_ps256:
10551   case X86::BI__builtin_ia32_vinsertf128_si256:
10552   case X86::BI__builtin_ia32_insert128i256:
10553   case X86::BI__builtin_ia32_insertf64x4:
10554   case X86::BI__builtin_ia32_insertf32x4:
10555   case X86::BI__builtin_ia32_inserti64x4:
10556   case X86::BI__builtin_ia32_inserti32x4:
10557   case X86::BI__builtin_ia32_insertf32x8:
10558   case X86::BI__builtin_ia32_inserti32x8:
10559   case X86::BI__builtin_ia32_insertf32x4_256:
10560   case X86::BI__builtin_ia32_inserti32x4_256:
10561   case X86::BI__builtin_ia32_insertf64x2_256:
10562   case X86::BI__builtin_ia32_inserti64x2_256:
10563   case X86::BI__builtin_ia32_insertf64x2_512:
10564   case X86::BI__builtin_ia32_inserti64x2_512: {
10565     unsigned DstNumElts = Ops[0]->getType()->getVectorNumElements();
10566     unsigned SrcNumElts = Ops[1]->getType()->getVectorNumElements();
10567     unsigned SubVectors = DstNumElts / SrcNumElts;
10568     unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
10569     assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
10570     Index &= SubVectors - 1; // Remove any extra bits.
10571     Index *= SrcNumElts;
10572 
10573     uint32_t Indices[16];
10574     for (unsigned i = 0; i != DstNumElts; ++i)
10575       Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
10576 
10577     Value *Op1 = Builder.CreateShuffleVector(Ops[1],
10578                                              UndefValue::get(Ops[1]->getType()),
10579                                              makeArrayRef(Indices, DstNumElts),
10580                                              "widen");
10581 
10582     for (unsigned i = 0; i != DstNumElts; ++i) {
10583       if (i >= Index && i < (Index + SrcNumElts))
10584         Indices[i] = (i - Index) + DstNumElts;
10585       else
10586         Indices[i] = i;
10587     }
10588 
10589     return Builder.CreateShuffleVector(Ops[0], Op1,
10590                                        makeArrayRef(Indices, DstNumElts),
10591                                        "insert");
10592   }
10593   case X86::BI__builtin_ia32_pmovqd512_mask:
10594   case X86::BI__builtin_ia32_pmovwb512_mask: {
10595     Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
10596     return EmitX86Select(*this, Ops[2], Res, Ops[1]);
10597   }
10598   case X86::BI__builtin_ia32_pmovdb512_mask:
10599   case X86::BI__builtin_ia32_pmovdw512_mask:
10600   case X86::BI__builtin_ia32_pmovqw512_mask: {
10601     if (const auto *C = dyn_cast<Constant>(Ops[2]))
10602       if (C->isAllOnesValue())
10603         return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
10604 
10605     Intrinsic::ID IID;
10606     switch (BuiltinID) {
10607     default: llvm_unreachable("Unsupported intrinsic!");
10608     case X86::BI__builtin_ia32_pmovdb512_mask:
10609       IID = Intrinsic::x86_avx512_mask_pmov_db_512;
10610       break;
10611     case X86::BI__builtin_ia32_pmovdw512_mask:
10612       IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
10613       break;
10614     case X86::BI__builtin_ia32_pmovqw512_mask:
10615       IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
10616       break;
10617     }
10618 
10619     Function *Intr = CGM.getIntrinsic(IID);
10620     return Builder.CreateCall(Intr, Ops);
10621   }
10622   case X86::BI__builtin_ia32_pblendw128:
10623   case X86::BI__builtin_ia32_blendpd:
10624   case X86::BI__builtin_ia32_blendps:
10625   case X86::BI__builtin_ia32_blendpd256:
10626   case X86::BI__builtin_ia32_blendps256:
10627   case X86::BI__builtin_ia32_pblendw256:
10628   case X86::BI__builtin_ia32_pblendd128:
10629   case X86::BI__builtin_ia32_pblendd256: {
10630     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
10631     unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
10632 
10633     uint32_t Indices[16];
10634     // If there are more than 8 elements, the immediate is used twice so make
10635     // sure we handle that.
10636     for (unsigned i = 0; i != NumElts; ++i)
10637       Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
10638 
10639     return Builder.CreateShuffleVector(Ops[0], Ops[1],
10640                                        makeArrayRef(Indices, NumElts),
10641                                        "blend");
10642   }
10643   case X86::BI__builtin_ia32_pshuflw:
10644   case X86::BI__builtin_ia32_pshuflw256:
10645   case X86::BI__builtin_ia32_pshuflw512: {
10646     uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
10647     llvm::Type *Ty = Ops[0]->getType();
10648     unsigned NumElts = Ty->getVectorNumElements();
10649 
10650     // Splat the 8-bits of immediate 4 times to help the loop wrap around.
10651     Imm = (Imm & 0xff) * 0x01010101;
10652 
10653     uint32_t Indices[32];
10654     for (unsigned l = 0; l != NumElts; l += 8) {
10655       for (unsigned i = 0; i != 4; ++i) {
10656         Indices[l + i] = l + (Imm & 3);
10657         Imm >>= 2;
10658       }
10659       for (unsigned i = 4; i != 8; ++i)
10660         Indices[l + i] = l + i;
10661     }
10662 
10663     return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
10664                                        makeArrayRef(Indices, NumElts),
10665                                        "pshuflw");
10666   }
10667   case X86::BI__builtin_ia32_pshufhw:
10668   case X86::BI__builtin_ia32_pshufhw256:
10669   case X86::BI__builtin_ia32_pshufhw512: {
10670     uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
10671     llvm::Type *Ty = Ops[0]->getType();
10672     unsigned NumElts = Ty->getVectorNumElements();
10673 
10674     // Splat the 8-bits of immediate 4 times to help the loop wrap around.
10675     Imm = (Imm & 0xff) * 0x01010101;
10676 
10677     uint32_t Indices[32];
10678     for (unsigned l = 0; l != NumElts; l += 8) {
10679       for (unsigned i = 0; i != 4; ++i)
10680         Indices[l + i] = l + i;
10681       for (unsigned i = 4; i != 8; ++i) {
10682         Indices[l + i] = l + 4 + (Imm & 3);
10683         Imm >>= 2;
10684       }
10685     }
10686 
10687     return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
10688                                        makeArrayRef(Indices, NumElts),
10689                                        "pshufhw");
10690   }
10691   case X86::BI__builtin_ia32_pshufd:
10692   case X86::BI__builtin_ia32_pshufd256:
10693   case X86::BI__builtin_ia32_pshufd512:
10694   case X86::BI__builtin_ia32_vpermilpd:
10695   case X86::BI__builtin_ia32_vpermilps:
10696   case X86::BI__builtin_ia32_vpermilpd256:
10697   case X86::BI__builtin_ia32_vpermilps256:
10698   case X86::BI__builtin_ia32_vpermilpd512:
10699   case X86::BI__builtin_ia32_vpermilps512: {
10700     uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
10701     llvm::Type *Ty = Ops[0]->getType();
10702     unsigned NumElts = Ty->getVectorNumElements();
10703     unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
10704     unsigned NumLaneElts = NumElts / NumLanes;
10705 
10706     // Splat the 8-bits of immediate 4 times to help the loop wrap around.
10707     Imm = (Imm & 0xff) * 0x01010101;
10708 
10709     uint32_t Indices[16];
10710     for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
10711       for (unsigned i = 0; i != NumLaneElts; ++i) {
10712         Indices[i + l] = (Imm % NumLaneElts) + l;
10713         Imm /= NumLaneElts;
10714       }
10715     }
10716 
10717     return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
10718                                        makeArrayRef(Indices, NumElts),
10719                                        "permil");
10720   }
10721   case X86::BI__builtin_ia32_shufpd:
10722   case X86::BI__builtin_ia32_shufpd256:
10723   case X86::BI__builtin_ia32_shufpd512:
10724   case X86::BI__builtin_ia32_shufps:
10725   case X86::BI__builtin_ia32_shufps256:
10726   case X86::BI__builtin_ia32_shufps512: {
10727     uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
10728     llvm::Type *Ty = Ops[0]->getType();
10729     unsigned NumElts = Ty->getVectorNumElements();
10730     unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
10731     unsigned NumLaneElts = NumElts / NumLanes;
10732 
10733     // Splat the 8-bits of immediate 4 times to help the loop wrap around.
10734     Imm = (Imm & 0xff) * 0x01010101;
10735 
10736     uint32_t Indices[16];
10737     for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
10738       for (unsigned i = 0; i != NumLaneElts; ++i) {
10739         unsigned Index = Imm % NumLaneElts;
10740         Imm /= NumLaneElts;
10741         if (i >= (NumLaneElts / 2))
10742           Index += NumElts;
10743         Indices[l + i] = l + Index;
10744       }
10745     }
10746 
10747     return Builder.CreateShuffleVector(Ops[0], Ops[1],
10748                                        makeArrayRef(Indices, NumElts),
10749                                        "shufp");
10750   }
10751   case X86::BI__builtin_ia32_permdi256:
10752   case X86::BI__builtin_ia32_permdf256:
10753   case X86::BI__builtin_ia32_permdi512:
10754   case X86::BI__builtin_ia32_permdf512: {
10755     unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
10756     llvm::Type *Ty = Ops[0]->getType();
10757     unsigned NumElts = Ty->getVectorNumElements();
10758 
10759     // These intrinsics operate on 256-bit lanes of four 64-bit elements.
10760     uint32_t Indices[8];
10761     for (unsigned l = 0; l != NumElts; l += 4)
10762       for (unsigned i = 0; i != 4; ++i)
10763         Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
10764 
10765     return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
10766                                        makeArrayRef(Indices, NumElts),
10767                                        "perm");
10768   }
10769   case X86::BI__builtin_ia32_palignr128:
10770   case X86::BI__builtin_ia32_palignr256:
10771   case X86::BI__builtin_ia32_palignr512: {
10772     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
10773 
10774     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
10775     assert(NumElts % 16 == 0);
10776 
10777     // If palignr is shifting the pair of vectors more than the size of two
10778     // lanes, emit zero.
10779     if (ShiftVal >= 32)
10780       return llvm::Constant::getNullValue(ConvertType(E->getType()));
10781 
10782     // If palignr is shifting the pair of input vectors more than one lane,
10783     // but less than two lanes, convert to shifting in zeroes.
10784     if (ShiftVal > 16) {
10785       ShiftVal -= 16;
10786       Ops[1] = Ops[0];
10787       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
10788     }
10789 
10790     uint32_t Indices[64];
10791     // 256-bit palignr operates on 128-bit lanes so we need to handle that
10792     for (unsigned l = 0; l != NumElts; l += 16) {
10793       for (unsigned i = 0; i != 16; ++i) {
10794         unsigned Idx = ShiftVal + i;
10795         if (Idx >= 16)
10796           Idx += NumElts - 16; // End of lane, switch operand.
10797         Indices[l + i] = Idx + l;
10798       }
10799     }
10800 
10801     return Builder.CreateShuffleVector(Ops[1], Ops[0],
10802                                        makeArrayRef(Indices, NumElts),
10803                                        "palignr");
10804   }
10805   case X86::BI__builtin_ia32_alignd128:
10806   case X86::BI__builtin_ia32_alignd256:
10807   case X86::BI__builtin_ia32_alignd512:
10808   case X86::BI__builtin_ia32_alignq128:
10809   case X86::BI__builtin_ia32_alignq256:
10810   case X86::BI__builtin_ia32_alignq512: {
10811     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
10812     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
10813 
10814     // Mask the shift amount to width of two vectors.
10815     ShiftVal &= (2 * NumElts) - 1;
10816 
10817     uint32_t Indices[16];
10818     for (unsigned i = 0; i != NumElts; ++i)
10819       Indices[i] = i + ShiftVal;
10820 
10821     return Builder.CreateShuffleVector(Ops[1], Ops[0],
10822                                        makeArrayRef(Indices, NumElts),
10823                                        "valign");
10824   }
10825   case X86::BI__builtin_ia32_shuf_f32x4_256:
10826   case X86::BI__builtin_ia32_shuf_f64x2_256:
10827   case X86::BI__builtin_ia32_shuf_i32x4_256:
10828   case X86::BI__builtin_ia32_shuf_i64x2_256:
10829   case X86::BI__builtin_ia32_shuf_f32x4:
10830   case X86::BI__builtin_ia32_shuf_f64x2:
10831   case X86::BI__builtin_ia32_shuf_i32x4:
10832   case X86::BI__builtin_ia32_shuf_i64x2: {
10833     unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
10834     llvm::Type *Ty = Ops[0]->getType();
10835     unsigned NumElts = Ty->getVectorNumElements();
10836     unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
10837     unsigned NumLaneElts = NumElts / NumLanes;
10838 
10839     uint32_t Indices[16];
10840     for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
10841       unsigned Index = (Imm % NumLanes) * NumLaneElts;
10842       Imm /= NumLanes; // Discard the bits we just used.
10843       if (l >= (NumElts / 2))
10844         Index += NumElts; // Switch to other source.
10845       for (unsigned i = 0; i != NumLaneElts; ++i) {
10846         Indices[l + i] = Index + i;
10847       }
10848     }
10849 
10850     return Builder.CreateShuffleVector(Ops[0], Ops[1],
10851                                        makeArrayRef(Indices, NumElts),
10852                                        "shuf");
10853   }
10854 
10855   case X86::BI__builtin_ia32_vperm2f128_pd256:
10856   case X86::BI__builtin_ia32_vperm2f128_ps256:
10857   case X86::BI__builtin_ia32_vperm2f128_si256:
10858   case X86::BI__builtin_ia32_permti256: {
10859     unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
10860     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
10861 
10862     // This takes a very simple approach since there are two lanes and a
10863     // shuffle can have 2 inputs. So we reserve the first input for the first
10864     // lane and the second input for the second lane. This may result in
10865     // duplicate sources, but this can be dealt with in the backend.
10866 
10867     Value *OutOps[2];
10868     uint32_t Indices[8];
10869     for (unsigned l = 0; l != 2; ++l) {
10870       // Determine the source for this lane.
10871       if (Imm & (1 << ((l * 4) + 3)))
10872         OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
10873       else if (Imm & (1 << ((l * 4) + 1)))
10874         OutOps[l] = Ops[1];
10875       else
10876         OutOps[l] = Ops[0];
10877 
10878       for (unsigned i = 0; i != NumElts/2; ++i) {
10879         // Start with ith element of the source for this lane.
10880         unsigned Idx = (l * NumElts) + i;
10881         // If bit 0 of the immediate half is set, switch to the high half of
10882         // the source.
10883         if (Imm & (1 << (l * 4)))
10884           Idx += NumElts/2;
10885         Indices[(l * (NumElts/2)) + i] = Idx;
10886       }
10887     }
10888 
10889     return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
10890                                        makeArrayRef(Indices, NumElts),
10891                                        "vperm");
10892   }
10893 
10894   case X86::BI__builtin_ia32_pslldqi128_byteshift:
10895   case X86::BI__builtin_ia32_pslldqi256_byteshift:
10896   case X86::BI__builtin_ia32_pslldqi512_byteshift: {
10897     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
10898     llvm::Type *ResultType = Ops[0]->getType();
10899     // Builtin type is vXi64 so multiply by 8 to get bytes.
10900     unsigned NumElts = ResultType->getVectorNumElements() * 8;
10901 
10902     // If pslldq is shifting the vector more than 15 bytes, emit zero.
10903     if (ShiftVal >= 16)
10904       return llvm::Constant::getNullValue(ResultType);
10905 
10906     uint32_t Indices[64];
10907     // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
10908     for (unsigned l = 0; l != NumElts; l += 16) {
10909       for (unsigned i = 0; i != 16; ++i) {
10910         unsigned Idx = NumElts + i - ShiftVal;
10911         if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
10912         Indices[l + i] = Idx + l;
10913       }
10914     }
10915 
10916     llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts);
10917     Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
10918     Value *Zero = llvm::Constant::getNullValue(VecTy);
10919     Value *SV = Builder.CreateShuffleVector(Zero, Cast,
10920                                             makeArrayRef(Indices, NumElts),
10921                                             "pslldq");
10922     return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
10923   }
10924   case X86::BI__builtin_ia32_psrldqi128_byteshift:
10925   case X86::BI__builtin_ia32_psrldqi256_byteshift:
10926   case X86::BI__builtin_ia32_psrldqi512_byteshift: {
10927     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
10928     llvm::Type *ResultType = Ops[0]->getType();
10929     // Builtin type is vXi64 so multiply by 8 to get bytes.
10930     unsigned NumElts = ResultType->getVectorNumElements() * 8;
10931 
10932     // If psrldq is shifting the vector more than 15 bytes, emit zero.
10933     if (ShiftVal >= 16)
10934       return llvm::Constant::getNullValue(ResultType);
10935 
10936     uint32_t Indices[64];
10937     // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
10938     for (unsigned l = 0; l != NumElts; l += 16) {
10939       for (unsigned i = 0; i != 16; ++i) {
10940         unsigned Idx = i + ShiftVal;
10941         if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
10942         Indices[l + i] = Idx + l;
10943       }
10944     }
10945 
10946     llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts);
10947     Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
10948     Value *Zero = llvm::Constant::getNullValue(VecTy);
10949     Value *SV = Builder.CreateShuffleVector(Cast, Zero,
10950                                             makeArrayRef(Indices, NumElts),
10951                                             "psrldq");
10952     return Builder.CreateBitCast(SV, ResultType, "cast");
10953   }
10954   case X86::BI__builtin_ia32_kshiftliqi:
10955   case X86::BI__builtin_ia32_kshiftlihi:
10956   case X86::BI__builtin_ia32_kshiftlisi:
10957   case X86::BI__builtin_ia32_kshiftlidi: {
10958     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
10959     unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
10960 
10961     if (ShiftVal >= NumElts)
10962       return llvm::Constant::getNullValue(Ops[0]->getType());
10963 
10964     Value *In = getMaskVecValue(*this, Ops[0], NumElts);
10965 
10966     uint32_t Indices[64];
10967     for (unsigned i = 0; i != NumElts; ++i)
10968       Indices[i] = NumElts + i - ShiftVal;
10969 
10970     Value *Zero = llvm::Constant::getNullValue(In->getType());
10971     Value *SV = Builder.CreateShuffleVector(Zero, In,
10972                                             makeArrayRef(Indices, NumElts),
10973                                             "kshiftl");
10974     return Builder.CreateBitCast(SV, Ops[0]->getType());
10975   }
10976   case X86::BI__builtin_ia32_kshiftriqi:
10977   case X86::BI__builtin_ia32_kshiftrihi:
10978   case X86::BI__builtin_ia32_kshiftrisi:
10979   case X86::BI__builtin_ia32_kshiftridi: {
10980     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
10981     unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
10982 
10983     if (ShiftVal >= NumElts)
10984       return llvm::Constant::getNullValue(Ops[0]->getType());
10985 
10986     Value *In = getMaskVecValue(*this, Ops[0], NumElts);
10987 
10988     uint32_t Indices[64];
10989     for (unsigned i = 0; i != NumElts; ++i)
10990       Indices[i] = i + ShiftVal;
10991 
10992     Value *Zero = llvm::Constant::getNullValue(In->getType());
10993     Value *SV = Builder.CreateShuffleVector(In, Zero,
10994                                             makeArrayRef(Indices, NumElts),
10995                                             "kshiftr");
10996     return Builder.CreateBitCast(SV, Ops[0]->getType());
10997   }
10998   case X86::BI__builtin_ia32_movnti:
10999   case X86::BI__builtin_ia32_movnti64:
11000   case X86::BI__builtin_ia32_movntsd:
11001   case X86::BI__builtin_ia32_movntss: {
11002     llvm::MDNode *Node = llvm::MDNode::get(
11003         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
11004 
11005     Value *Ptr = Ops[0];
11006     Value *Src = Ops[1];
11007 
11008     // Extract the 0'th element of the source vector.
11009     if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
11010         BuiltinID == X86::BI__builtin_ia32_movntss)
11011       Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
11012 
11013     // Convert the type of the pointer to a pointer to the stored type.
11014     Value *BC = Builder.CreateBitCast(
11015         Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
11016 
11017     // Unaligned nontemporal store of the scalar value.
11018     StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
11019     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
11020     SI->setAlignment(1);
11021     return SI;
11022   }
11023   // Rotate is a special case of funnel shift - 1st 2 args are the same.
11024   case X86::BI__builtin_ia32_vprotb:
11025   case X86::BI__builtin_ia32_vprotw:
11026   case X86::BI__builtin_ia32_vprotd:
11027   case X86::BI__builtin_ia32_vprotq:
11028   case X86::BI__builtin_ia32_vprotbi:
11029   case X86::BI__builtin_ia32_vprotwi:
11030   case X86::BI__builtin_ia32_vprotdi:
11031   case X86::BI__builtin_ia32_vprotqi:
11032   case X86::BI__builtin_ia32_prold128:
11033   case X86::BI__builtin_ia32_prold256:
11034   case X86::BI__builtin_ia32_prold512:
11035   case X86::BI__builtin_ia32_prolq128:
11036   case X86::BI__builtin_ia32_prolq256:
11037   case X86::BI__builtin_ia32_prolq512:
11038   case X86::BI__builtin_ia32_prolvd128:
11039   case X86::BI__builtin_ia32_prolvd256:
11040   case X86::BI__builtin_ia32_prolvd512:
11041   case X86::BI__builtin_ia32_prolvq128:
11042   case X86::BI__builtin_ia32_prolvq256:
11043   case X86::BI__builtin_ia32_prolvq512:
11044     return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
11045   case X86::BI__builtin_ia32_prord128:
11046   case X86::BI__builtin_ia32_prord256:
11047   case X86::BI__builtin_ia32_prord512:
11048   case X86::BI__builtin_ia32_prorq128:
11049   case X86::BI__builtin_ia32_prorq256:
11050   case X86::BI__builtin_ia32_prorq512:
11051   case X86::BI__builtin_ia32_prorvd128:
11052   case X86::BI__builtin_ia32_prorvd256:
11053   case X86::BI__builtin_ia32_prorvd512:
11054   case X86::BI__builtin_ia32_prorvq128:
11055   case X86::BI__builtin_ia32_prorvq256:
11056   case X86::BI__builtin_ia32_prorvq512:
11057     return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
11058   case X86::BI__builtin_ia32_selectb_128:
11059   case X86::BI__builtin_ia32_selectb_256:
11060   case X86::BI__builtin_ia32_selectb_512:
11061   case X86::BI__builtin_ia32_selectw_128:
11062   case X86::BI__builtin_ia32_selectw_256:
11063   case X86::BI__builtin_ia32_selectw_512:
11064   case X86::BI__builtin_ia32_selectd_128:
11065   case X86::BI__builtin_ia32_selectd_256:
11066   case X86::BI__builtin_ia32_selectd_512:
11067   case X86::BI__builtin_ia32_selectq_128:
11068   case X86::BI__builtin_ia32_selectq_256:
11069   case X86::BI__builtin_ia32_selectq_512:
11070   case X86::BI__builtin_ia32_selectps_128:
11071   case X86::BI__builtin_ia32_selectps_256:
11072   case X86::BI__builtin_ia32_selectps_512:
11073   case X86::BI__builtin_ia32_selectpd_128:
11074   case X86::BI__builtin_ia32_selectpd_256:
11075   case X86::BI__builtin_ia32_selectpd_512:
11076     return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
11077   case X86::BI__builtin_ia32_selectss_128:
11078   case X86::BI__builtin_ia32_selectsd_128: {
11079     Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
11080     Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
11081     A = EmitX86ScalarSelect(*this, Ops[0], A, B);
11082     return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
11083   }
11084   case X86::BI__builtin_ia32_cmpb128_mask:
11085   case X86::BI__builtin_ia32_cmpb256_mask:
11086   case X86::BI__builtin_ia32_cmpb512_mask:
11087   case X86::BI__builtin_ia32_cmpw128_mask:
11088   case X86::BI__builtin_ia32_cmpw256_mask:
11089   case X86::BI__builtin_ia32_cmpw512_mask:
11090   case X86::BI__builtin_ia32_cmpd128_mask:
11091   case X86::BI__builtin_ia32_cmpd256_mask:
11092   case X86::BI__builtin_ia32_cmpd512_mask:
11093   case X86::BI__builtin_ia32_cmpq128_mask:
11094   case X86::BI__builtin_ia32_cmpq256_mask:
11095   case X86::BI__builtin_ia32_cmpq512_mask: {
11096     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
11097     return EmitX86MaskedCompare(*this, CC, true, Ops);
11098   }
11099   case X86::BI__builtin_ia32_ucmpb128_mask:
11100   case X86::BI__builtin_ia32_ucmpb256_mask:
11101   case X86::BI__builtin_ia32_ucmpb512_mask:
11102   case X86::BI__builtin_ia32_ucmpw128_mask:
11103   case X86::BI__builtin_ia32_ucmpw256_mask:
11104   case X86::BI__builtin_ia32_ucmpw512_mask:
11105   case X86::BI__builtin_ia32_ucmpd128_mask:
11106   case X86::BI__builtin_ia32_ucmpd256_mask:
11107   case X86::BI__builtin_ia32_ucmpd512_mask:
11108   case X86::BI__builtin_ia32_ucmpq128_mask:
11109   case X86::BI__builtin_ia32_ucmpq256_mask:
11110   case X86::BI__builtin_ia32_ucmpq512_mask: {
11111     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
11112     return EmitX86MaskedCompare(*this, CC, false, Ops);
11113   }
11114   case X86::BI__builtin_ia32_vpcomb:
11115   case X86::BI__builtin_ia32_vpcomw:
11116   case X86::BI__builtin_ia32_vpcomd:
11117   case X86::BI__builtin_ia32_vpcomq:
11118     return EmitX86vpcom(*this, Ops, true);
11119   case X86::BI__builtin_ia32_vpcomub:
11120   case X86::BI__builtin_ia32_vpcomuw:
11121   case X86::BI__builtin_ia32_vpcomud:
11122   case X86::BI__builtin_ia32_vpcomuq:
11123     return EmitX86vpcom(*this, Ops, false);
11124 
11125   case X86::BI__builtin_ia32_kortestcqi:
11126   case X86::BI__builtin_ia32_kortestchi:
11127   case X86::BI__builtin_ia32_kortestcsi:
11128   case X86::BI__builtin_ia32_kortestcdi: {
11129     Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
11130     Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
11131     Value *Cmp = Builder.CreateICmpEQ(Or, C);
11132     return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
11133   }
11134   case X86::BI__builtin_ia32_kortestzqi:
11135   case X86::BI__builtin_ia32_kortestzhi:
11136   case X86::BI__builtin_ia32_kortestzsi:
11137   case X86::BI__builtin_ia32_kortestzdi: {
11138     Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
11139     Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
11140     Value *Cmp = Builder.CreateICmpEQ(Or, C);
11141     return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
11142   }
11143 
11144   case X86::BI__builtin_ia32_ktestcqi:
11145   case X86::BI__builtin_ia32_ktestzqi:
11146   case X86::BI__builtin_ia32_ktestchi:
11147   case X86::BI__builtin_ia32_ktestzhi:
11148   case X86::BI__builtin_ia32_ktestcsi:
11149   case X86::BI__builtin_ia32_ktestzsi:
11150   case X86::BI__builtin_ia32_ktestcdi:
11151   case X86::BI__builtin_ia32_ktestzdi: {
11152     Intrinsic::ID IID;
11153     switch (BuiltinID) {
11154     default: llvm_unreachable("Unsupported intrinsic!");
11155     case X86::BI__builtin_ia32_ktestcqi:
11156       IID = Intrinsic::x86_avx512_ktestc_b;
11157       break;
11158     case X86::BI__builtin_ia32_ktestzqi:
11159       IID = Intrinsic::x86_avx512_ktestz_b;
11160       break;
11161     case X86::BI__builtin_ia32_ktestchi:
11162       IID = Intrinsic::x86_avx512_ktestc_w;
11163       break;
11164     case X86::BI__builtin_ia32_ktestzhi:
11165       IID = Intrinsic::x86_avx512_ktestz_w;
11166       break;
11167     case X86::BI__builtin_ia32_ktestcsi:
11168       IID = Intrinsic::x86_avx512_ktestc_d;
11169       break;
11170     case X86::BI__builtin_ia32_ktestzsi:
11171       IID = Intrinsic::x86_avx512_ktestz_d;
11172       break;
11173     case X86::BI__builtin_ia32_ktestcdi:
11174       IID = Intrinsic::x86_avx512_ktestc_q;
11175       break;
11176     case X86::BI__builtin_ia32_ktestzdi:
11177       IID = Intrinsic::x86_avx512_ktestz_q;
11178       break;
11179     }
11180 
11181     unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
11182     Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
11183     Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
11184     Function *Intr = CGM.getIntrinsic(IID);
11185     return Builder.CreateCall(Intr, {LHS, RHS});
11186   }
11187 
11188   case X86::BI__builtin_ia32_kaddqi:
11189   case X86::BI__builtin_ia32_kaddhi:
11190   case X86::BI__builtin_ia32_kaddsi:
11191   case X86::BI__builtin_ia32_kadddi: {
11192     Intrinsic::ID IID;
11193     switch (BuiltinID) {
11194     default: llvm_unreachable("Unsupported intrinsic!");
11195     case X86::BI__builtin_ia32_kaddqi:
11196       IID = Intrinsic::x86_avx512_kadd_b;
11197       break;
11198     case X86::BI__builtin_ia32_kaddhi:
11199       IID = Intrinsic::x86_avx512_kadd_w;
11200       break;
11201     case X86::BI__builtin_ia32_kaddsi:
11202       IID = Intrinsic::x86_avx512_kadd_d;
11203       break;
11204     case X86::BI__builtin_ia32_kadddi:
11205       IID = Intrinsic::x86_avx512_kadd_q;
11206       break;
11207     }
11208 
11209     unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
11210     Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
11211     Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
11212     Function *Intr = CGM.getIntrinsic(IID);
11213     Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
11214     return Builder.CreateBitCast(Res, Ops[0]->getType());
11215   }
11216   case X86::BI__builtin_ia32_kandqi:
11217   case X86::BI__builtin_ia32_kandhi:
11218   case X86::BI__builtin_ia32_kandsi:
11219   case X86::BI__builtin_ia32_kanddi:
11220     return EmitX86MaskLogic(*this, Instruction::And, Ops);
11221   case X86::BI__builtin_ia32_kandnqi:
11222   case X86::BI__builtin_ia32_kandnhi:
11223   case X86::BI__builtin_ia32_kandnsi:
11224   case X86::BI__builtin_ia32_kandndi:
11225     return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
11226   case X86::BI__builtin_ia32_korqi:
11227   case X86::BI__builtin_ia32_korhi:
11228   case X86::BI__builtin_ia32_korsi:
11229   case X86::BI__builtin_ia32_kordi:
11230     return EmitX86MaskLogic(*this, Instruction::Or, Ops);
11231   case X86::BI__builtin_ia32_kxnorqi:
11232   case X86::BI__builtin_ia32_kxnorhi:
11233   case X86::BI__builtin_ia32_kxnorsi:
11234   case X86::BI__builtin_ia32_kxnordi:
11235     return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
11236   case X86::BI__builtin_ia32_kxorqi:
11237   case X86::BI__builtin_ia32_kxorhi:
11238   case X86::BI__builtin_ia32_kxorsi:
11239   case X86::BI__builtin_ia32_kxordi:
11240     return EmitX86MaskLogic(*this, Instruction::Xor,  Ops);
11241   case X86::BI__builtin_ia32_knotqi:
11242   case X86::BI__builtin_ia32_knothi:
11243   case X86::BI__builtin_ia32_knotsi:
11244   case X86::BI__builtin_ia32_knotdi: {
11245     unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
11246     Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
11247     return Builder.CreateBitCast(Builder.CreateNot(Res),
11248                                  Ops[0]->getType());
11249   }
11250   case X86::BI__builtin_ia32_kmovb:
11251   case X86::BI__builtin_ia32_kmovw:
11252   case X86::BI__builtin_ia32_kmovd:
11253   case X86::BI__builtin_ia32_kmovq: {
11254     // Bitcast to vXi1 type and then back to integer. This gets the mask
11255     // register type into the IR, but might be optimized out depending on
11256     // what's around it.
11257     unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
11258     Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
11259     return Builder.CreateBitCast(Res, Ops[0]->getType());
11260   }
11261 
11262   case X86::BI__builtin_ia32_kunpckdi:
11263   case X86::BI__builtin_ia32_kunpcksi:
11264   case X86::BI__builtin_ia32_kunpckhi: {
11265     unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
11266     Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
11267     Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
11268     uint32_t Indices[64];
11269     for (unsigned i = 0; i != NumElts; ++i)
11270       Indices[i] = i;
11271 
11272     // First extract half of each vector. This gives better codegen than
11273     // doing it in a single shuffle.
11274     LHS = Builder.CreateShuffleVector(LHS, LHS,
11275                                       makeArrayRef(Indices, NumElts / 2));
11276     RHS = Builder.CreateShuffleVector(RHS, RHS,
11277                                       makeArrayRef(Indices, NumElts / 2));
11278     // Concat the vectors.
11279     // NOTE: Operands are swapped to match the intrinsic definition.
11280     Value *Res = Builder.CreateShuffleVector(RHS, LHS,
11281                                              makeArrayRef(Indices, NumElts));
11282     return Builder.CreateBitCast(Res, Ops[0]->getType());
11283   }
11284 
11285   case X86::BI__builtin_ia32_vplzcntd_128:
11286   case X86::BI__builtin_ia32_vplzcntd_256:
11287   case X86::BI__builtin_ia32_vplzcntd_512:
11288   case X86::BI__builtin_ia32_vplzcntq_128:
11289   case X86::BI__builtin_ia32_vplzcntq_256:
11290   case X86::BI__builtin_ia32_vplzcntq_512: {
11291     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
11292     return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
11293   }
11294   case X86::BI__builtin_ia32_sqrtss:
11295   case X86::BI__builtin_ia32_sqrtsd: {
11296     Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
11297     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
11298     A = Builder.CreateCall(F, {A});
11299     return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
11300   }
11301   case X86::BI__builtin_ia32_sqrtsd_round_mask:
11302   case X86::BI__builtin_ia32_sqrtss_round_mask: {
11303     unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
11304     // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
11305     // otherwise keep the intrinsic.
11306     if (CC != 4) {
11307       Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtsd_round_mask ?
11308                           Intrinsic::x86_avx512_mask_sqrt_sd :
11309                           Intrinsic::x86_avx512_mask_sqrt_ss;
11310       return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
11311     }
11312     Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
11313     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
11314     A = Builder.CreateCall(F, A);
11315     Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
11316     A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
11317     return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
11318   }
11319   case X86::BI__builtin_ia32_sqrtpd256:
11320   case X86::BI__builtin_ia32_sqrtpd:
11321   case X86::BI__builtin_ia32_sqrtps256:
11322   case X86::BI__builtin_ia32_sqrtps:
11323   case X86::BI__builtin_ia32_sqrtps512:
11324   case X86::BI__builtin_ia32_sqrtpd512: {
11325     if (Ops.size() == 2) {
11326       unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
11327       // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
11328       // otherwise keep the intrinsic.
11329       if (CC != 4) {
11330         Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtps512 ?
11331                             Intrinsic::x86_avx512_sqrt_ps_512 :
11332                             Intrinsic::x86_avx512_sqrt_pd_512;
11333         return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
11334       }
11335     }
11336     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
11337     return Builder.CreateCall(F, Ops[0]);
11338   }
11339   case X86::BI__builtin_ia32_pabsb128:
11340   case X86::BI__builtin_ia32_pabsw128:
11341   case X86::BI__builtin_ia32_pabsd128:
11342   case X86::BI__builtin_ia32_pabsb256:
11343   case X86::BI__builtin_ia32_pabsw256:
11344   case X86::BI__builtin_ia32_pabsd256:
11345   case X86::BI__builtin_ia32_pabsq128:
11346   case X86::BI__builtin_ia32_pabsq256:
11347   case X86::BI__builtin_ia32_pabsb512:
11348   case X86::BI__builtin_ia32_pabsw512:
11349   case X86::BI__builtin_ia32_pabsd512:
11350   case X86::BI__builtin_ia32_pabsq512:
11351     return EmitX86Abs(*this, Ops);
11352 
11353   case X86::BI__builtin_ia32_pmaxsb128:
11354   case X86::BI__builtin_ia32_pmaxsw128:
11355   case X86::BI__builtin_ia32_pmaxsd128:
11356   case X86::BI__builtin_ia32_pmaxsq128:
11357   case X86::BI__builtin_ia32_pmaxsb256:
11358   case X86::BI__builtin_ia32_pmaxsw256:
11359   case X86::BI__builtin_ia32_pmaxsd256:
11360   case X86::BI__builtin_ia32_pmaxsq256:
11361   case X86::BI__builtin_ia32_pmaxsb512:
11362   case X86::BI__builtin_ia32_pmaxsw512:
11363   case X86::BI__builtin_ia32_pmaxsd512:
11364   case X86::BI__builtin_ia32_pmaxsq512:
11365     return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
11366   case X86::BI__builtin_ia32_pmaxub128:
11367   case X86::BI__builtin_ia32_pmaxuw128:
11368   case X86::BI__builtin_ia32_pmaxud128:
11369   case X86::BI__builtin_ia32_pmaxuq128:
11370   case X86::BI__builtin_ia32_pmaxub256:
11371   case X86::BI__builtin_ia32_pmaxuw256:
11372   case X86::BI__builtin_ia32_pmaxud256:
11373   case X86::BI__builtin_ia32_pmaxuq256:
11374   case X86::BI__builtin_ia32_pmaxub512:
11375   case X86::BI__builtin_ia32_pmaxuw512:
11376   case X86::BI__builtin_ia32_pmaxud512:
11377   case X86::BI__builtin_ia32_pmaxuq512:
11378     return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
11379   case X86::BI__builtin_ia32_pminsb128:
11380   case X86::BI__builtin_ia32_pminsw128:
11381   case X86::BI__builtin_ia32_pminsd128:
11382   case X86::BI__builtin_ia32_pminsq128:
11383   case X86::BI__builtin_ia32_pminsb256:
11384   case X86::BI__builtin_ia32_pminsw256:
11385   case X86::BI__builtin_ia32_pminsd256:
11386   case X86::BI__builtin_ia32_pminsq256:
11387   case X86::BI__builtin_ia32_pminsb512:
11388   case X86::BI__builtin_ia32_pminsw512:
11389   case X86::BI__builtin_ia32_pminsd512:
11390   case X86::BI__builtin_ia32_pminsq512:
11391     return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
11392   case X86::BI__builtin_ia32_pminub128:
11393   case X86::BI__builtin_ia32_pminuw128:
11394   case X86::BI__builtin_ia32_pminud128:
11395   case X86::BI__builtin_ia32_pminuq128:
11396   case X86::BI__builtin_ia32_pminub256:
11397   case X86::BI__builtin_ia32_pminuw256:
11398   case X86::BI__builtin_ia32_pminud256:
11399   case X86::BI__builtin_ia32_pminuq256:
11400   case X86::BI__builtin_ia32_pminub512:
11401   case X86::BI__builtin_ia32_pminuw512:
11402   case X86::BI__builtin_ia32_pminud512:
11403   case X86::BI__builtin_ia32_pminuq512:
11404     return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
11405 
11406   case X86::BI__builtin_ia32_pmuludq128:
11407   case X86::BI__builtin_ia32_pmuludq256:
11408   case X86::BI__builtin_ia32_pmuludq512:
11409     return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
11410 
11411   case X86::BI__builtin_ia32_pmuldq128:
11412   case X86::BI__builtin_ia32_pmuldq256:
11413   case X86::BI__builtin_ia32_pmuldq512:
11414     return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
11415 
11416   case X86::BI__builtin_ia32_pternlogd512_mask:
11417   case X86::BI__builtin_ia32_pternlogq512_mask:
11418   case X86::BI__builtin_ia32_pternlogd128_mask:
11419   case X86::BI__builtin_ia32_pternlogd256_mask:
11420   case X86::BI__builtin_ia32_pternlogq128_mask:
11421   case X86::BI__builtin_ia32_pternlogq256_mask:
11422     return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
11423 
11424   case X86::BI__builtin_ia32_pternlogd512_maskz:
11425   case X86::BI__builtin_ia32_pternlogq512_maskz:
11426   case X86::BI__builtin_ia32_pternlogd128_maskz:
11427   case X86::BI__builtin_ia32_pternlogd256_maskz:
11428   case X86::BI__builtin_ia32_pternlogq128_maskz:
11429   case X86::BI__builtin_ia32_pternlogq256_maskz:
11430     return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
11431 
11432   case X86::BI__builtin_ia32_vpshldd128:
11433   case X86::BI__builtin_ia32_vpshldd256:
11434   case X86::BI__builtin_ia32_vpshldd512:
11435   case X86::BI__builtin_ia32_vpshldq128:
11436   case X86::BI__builtin_ia32_vpshldq256:
11437   case X86::BI__builtin_ia32_vpshldq512:
11438   case X86::BI__builtin_ia32_vpshldw128:
11439   case X86::BI__builtin_ia32_vpshldw256:
11440   case X86::BI__builtin_ia32_vpshldw512:
11441     return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
11442 
11443   case X86::BI__builtin_ia32_vpshrdd128:
11444   case X86::BI__builtin_ia32_vpshrdd256:
11445   case X86::BI__builtin_ia32_vpshrdd512:
11446   case X86::BI__builtin_ia32_vpshrdq128:
11447   case X86::BI__builtin_ia32_vpshrdq256:
11448   case X86::BI__builtin_ia32_vpshrdq512:
11449   case X86::BI__builtin_ia32_vpshrdw128:
11450   case X86::BI__builtin_ia32_vpshrdw256:
11451   case X86::BI__builtin_ia32_vpshrdw512:
11452     // Ops 0 and 1 are swapped.
11453     return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
11454 
11455   case X86::BI__builtin_ia32_vpshldvd128:
11456   case X86::BI__builtin_ia32_vpshldvd256:
11457   case X86::BI__builtin_ia32_vpshldvd512:
11458   case X86::BI__builtin_ia32_vpshldvq128:
11459   case X86::BI__builtin_ia32_vpshldvq256:
11460   case X86::BI__builtin_ia32_vpshldvq512:
11461   case X86::BI__builtin_ia32_vpshldvw128:
11462   case X86::BI__builtin_ia32_vpshldvw256:
11463   case X86::BI__builtin_ia32_vpshldvw512:
11464     return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
11465 
11466   case X86::BI__builtin_ia32_vpshrdvd128:
11467   case X86::BI__builtin_ia32_vpshrdvd256:
11468   case X86::BI__builtin_ia32_vpshrdvd512:
11469   case X86::BI__builtin_ia32_vpshrdvq128:
11470   case X86::BI__builtin_ia32_vpshrdvq256:
11471   case X86::BI__builtin_ia32_vpshrdvq512:
11472   case X86::BI__builtin_ia32_vpshrdvw128:
11473   case X86::BI__builtin_ia32_vpshrdvw256:
11474   case X86::BI__builtin_ia32_vpshrdvw512:
11475     // Ops 0 and 1 are swapped.
11476     return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
11477 
11478   // 3DNow!
11479   case X86::BI__builtin_ia32_pswapdsf:
11480   case X86::BI__builtin_ia32_pswapdsi: {
11481     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
11482     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
11483     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
11484     return Builder.CreateCall(F, Ops, "pswapd");
11485   }
11486   case X86::BI__builtin_ia32_rdrand16_step:
11487   case X86::BI__builtin_ia32_rdrand32_step:
11488   case X86::BI__builtin_ia32_rdrand64_step:
11489   case X86::BI__builtin_ia32_rdseed16_step:
11490   case X86::BI__builtin_ia32_rdseed32_step:
11491   case X86::BI__builtin_ia32_rdseed64_step: {
11492     Intrinsic::ID ID;
11493     switch (BuiltinID) {
11494     default: llvm_unreachable("Unsupported intrinsic!");
11495     case X86::BI__builtin_ia32_rdrand16_step:
11496       ID = Intrinsic::x86_rdrand_16;
11497       break;
11498     case X86::BI__builtin_ia32_rdrand32_step:
11499       ID = Intrinsic::x86_rdrand_32;
11500       break;
11501     case X86::BI__builtin_ia32_rdrand64_step:
11502       ID = Intrinsic::x86_rdrand_64;
11503       break;
11504     case X86::BI__builtin_ia32_rdseed16_step:
11505       ID = Intrinsic::x86_rdseed_16;
11506       break;
11507     case X86::BI__builtin_ia32_rdseed32_step:
11508       ID = Intrinsic::x86_rdseed_32;
11509       break;
11510     case X86::BI__builtin_ia32_rdseed64_step:
11511       ID = Intrinsic::x86_rdseed_64;
11512       break;
11513     }
11514 
11515     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
11516     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
11517                                       Ops[0]);
11518     return Builder.CreateExtractValue(Call, 1);
11519   }
11520   case X86::BI__builtin_ia32_addcarryx_u32:
11521   case X86::BI__builtin_ia32_addcarryx_u64:
11522   case X86::BI__builtin_ia32_subborrow_u32:
11523   case X86::BI__builtin_ia32_subborrow_u64: {
11524     Intrinsic::ID IID;
11525     switch (BuiltinID) {
11526     default: llvm_unreachable("Unsupported intrinsic!");
11527     case X86::BI__builtin_ia32_addcarryx_u32:
11528       IID = Intrinsic::x86_addcarry_32;
11529       break;
11530     case X86::BI__builtin_ia32_addcarryx_u64:
11531       IID = Intrinsic::x86_addcarry_64;
11532       break;
11533     case X86::BI__builtin_ia32_subborrow_u32:
11534       IID = Intrinsic::x86_subborrow_32;
11535       break;
11536     case X86::BI__builtin_ia32_subborrow_u64:
11537       IID = Intrinsic::x86_subborrow_64;
11538       break;
11539     }
11540 
11541     Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
11542                                      { Ops[0], Ops[1], Ops[2] });
11543     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
11544                                       Ops[3]);
11545     return Builder.CreateExtractValue(Call, 0);
11546   }
11547 
11548   case X86::BI__builtin_ia32_fpclassps128_mask:
11549   case X86::BI__builtin_ia32_fpclassps256_mask:
11550   case X86::BI__builtin_ia32_fpclassps512_mask:
11551   case X86::BI__builtin_ia32_fpclasspd128_mask:
11552   case X86::BI__builtin_ia32_fpclasspd256_mask:
11553   case X86::BI__builtin_ia32_fpclasspd512_mask: {
11554     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
11555     Value *MaskIn = Ops[2];
11556     Ops.erase(&Ops[2]);
11557 
11558     Intrinsic::ID ID;
11559     switch (BuiltinID) {
11560     default: llvm_unreachable("Unsupported intrinsic!");
11561     case X86::BI__builtin_ia32_fpclassps128_mask:
11562       ID = Intrinsic::x86_avx512_fpclass_ps_128;
11563       break;
11564     case X86::BI__builtin_ia32_fpclassps256_mask:
11565       ID = Intrinsic::x86_avx512_fpclass_ps_256;
11566       break;
11567     case X86::BI__builtin_ia32_fpclassps512_mask:
11568       ID = Intrinsic::x86_avx512_fpclass_ps_512;
11569       break;
11570     case X86::BI__builtin_ia32_fpclasspd128_mask:
11571       ID = Intrinsic::x86_avx512_fpclass_pd_128;
11572       break;
11573     case X86::BI__builtin_ia32_fpclasspd256_mask:
11574       ID = Intrinsic::x86_avx512_fpclass_pd_256;
11575       break;
11576     case X86::BI__builtin_ia32_fpclasspd512_mask:
11577       ID = Intrinsic::x86_avx512_fpclass_pd_512;
11578       break;
11579     }
11580 
11581     Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
11582     return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
11583   }
11584 
11585   case X86::BI__builtin_ia32_vpmultishiftqb128:
11586   case X86::BI__builtin_ia32_vpmultishiftqb256:
11587   case X86::BI__builtin_ia32_vpmultishiftqb512: {
11588     Intrinsic::ID ID;
11589     switch (BuiltinID) {
11590     default: llvm_unreachable("Unsupported intrinsic!");
11591     case X86::BI__builtin_ia32_vpmultishiftqb128:
11592       ID = Intrinsic::x86_avx512_pmultishift_qb_128;
11593       break;
11594     case X86::BI__builtin_ia32_vpmultishiftqb256:
11595       ID = Intrinsic::x86_avx512_pmultishift_qb_256;
11596       break;
11597     case X86::BI__builtin_ia32_vpmultishiftqb512:
11598       ID = Intrinsic::x86_avx512_pmultishift_qb_512;
11599       break;
11600     }
11601 
11602     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
11603   }
11604 
11605   case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
11606   case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
11607   case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
11608     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
11609     Value *MaskIn = Ops[2];
11610     Ops.erase(&Ops[2]);
11611 
11612     Intrinsic::ID ID;
11613     switch (BuiltinID) {
11614     default: llvm_unreachable("Unsupported intrinsic!");
11615     case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
11616       ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
11617       break;
11618     case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
11619       ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
11620       break;
11621     case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
11622       ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
11623       break;
11624     }
11625 
11626     Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
11627     return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
11628   }
11629 
11630   // packed comparison intrinsics
11631   case X86::BI__builtin_ia32_cmpeqps:
11632   case X86::BI__builtin_ia32_cmpeqpd:
11633     return getVectorFCmpIR(CmpInst::FCMP_OEQ);
11634   case X86::BI__builtin_ia32_cmpltps:
11635   case X86::BI__builtin_ia32_cmpltpd:
11636     return getVectorFCmpIR(CmpInst::FCMP_OLT);
11637   case X86::BI__builtin_ia32_cmpleps:
11638   case X86::BI__builtin_ia32_cmplepd:
11639     return getVectorFCmpIR(CmpInst::FCMP_OLE);
11640   case X86::BI__builtin_ia32_cmpunordps:
11641   case X86::BI__builtin_ia32_cmpunordpd:
11642     return getVectorFCmpIR(CmpInst::FCMP_UNO);
11643   case X86::BI__builtin_ia32_cmpneqps:
11644   case X86::BI__builtin_ia32_cmpneqpd:
11645     return getVectorFCmpIR(CmpInst::FCMP_UNE);
11646   case X86::BI__builtin_ia32_cmpnltps:
11647   case X86::BI__builtin_ia32_cmpnltpd:
11648     return getVectorFCmpIR(CmpInst::FCMP_UGE);
11649   case X86::BI__builtin_ia32_cmpnleps:
11650   case X86::BI__builtin_ia32_cmpnlepd:
11651     return getVectorFCmpIR(CmpInst::FCMP_UGT);
11652   case X86::BI__builtin_ia32_cmpordps:
11653   case X86::BI__builtin_ia32_cmpordpd:
11654     return getVectorFCmpIR(CmpInst::FCMP_ORD);
11655   case X86::BI__builtin_ia32_cmpps:
11656   case X86::BI__builtin_ia32_cmpps256:
11657   case X86::BI__builtin_ia32_cmppd:
11658   case X86::BI__builtin_ia32_cmppd256:
11659   case X86::BI__builtin_ia32_cmpps128_mask:
11660   case X86::BI__builtin_ia32_cmpps256_mask:
11661   case X86::BI__builtin_ia32_cmpps512_mask:
11662   case X86::BI__builtin_ia32_cmppd128_mask:
11663   case X86::BI__builtin_ia32_cmppd256_mask:
11664   case X86::BI__builtin_ia32_cmppd512_mask: {
11665     // Lowering vector comparisons to fcmp instructions, while
11666     // ignoring signalling behaviour requested
11667     // ignoring rounding mode requested
11668     // This is is only possible as long as FENV_ACCESS is not implemented.
11669     // See also: https://reviews.llvm.org/D45616
11670 
11671     // The third argument is the comparison condition, and integer in the
11672     // range [0, 31]
11673     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
11674 
11675     // Lowering to IR fcmp instruction.
11676     // Ignoring requested signaling behaviour,
11677     // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
11678     FCmpInst::Predicate Pred;
11679     switch (CC) {
11680     case 0x00: Pred = FCmpInst::FCMP_OEQ;   break;
11681     case 0x01: Pred = FCmpInst::FCMP_OLT;   break;
11682     case 0x02: Pred = FCmpInst::FCMP_OLE;   break;
11683     case 0x03: Pred = FCmpInst::FCMP_UNO;   break;
11684     case 0x04: Pred = FCmpInst::FCMP_UNE;   break;
11685     case 0x05: Pred = FCmpInst::FCMP_UGE;   break;
11686     case 0x06: Pred = FCmpInst::FCMP_UGT;   break;
11687     case 0x07: Pred = FCmpInst::FCMP_ORD;   break;
11688     case 0x08: Pred = FCmpInst::FCMP_UEQ;   break;
11689     case 0x09: Pred = FCmpInst::FCMP_ULT;   break;
11690     case 0x0a: Pred = FCmpInst::FCMP_ULE;   break;
11691     case 0x0b: Pred = FCmpInst::FCMP_FALSE; break;
11692     case 0x0c: Pred = FCmpInst::FCMP_ONE;   break;
11693     case 0x0d: Pred = FCmpInst::FCMP_OGE;   break;
11694     case 0x0e: Pred = FCmpInst::FCMP_OGT;   break;
11695     case 0x0f: Pred = FCmpInst::FCMP_TRUE;  break;
11696     case 0x10: Pred = FCmpInst::FCMP_OEQ;   break;
11697     case 0x11: Pred = FCmpInst::FCMP_OLT;   break;
11698     case 0x12: Pred = FCmpInst::FCMP_OLE;   break;
11699     case 0x13: Pred = FCmpInst::FCMP_UNO;   break;
11700     case 0x14: Pred = FCmpInst::FCMP_UNE;   break;
11701     case 0x15: Pred = FCmpInst::FCMP_UGE;   break;
11702     case 0x16: Pred = FCmpInst::FCMP_UGT;   break;
11703     case 0x17: Pred = FCmpInst::FCMP_ORD;   break;
11704     case 0x18: Pred = FCmpInst::FCMP_UEQ;   break;
11705     case 0x19: Pred = FCmpInst::FCMP_ULT;   break;
11706     case 0x1a: Pred = FCmpInst::FCMP_ULE;   break;
11707     case 0x1b: Pred = FCmpInst::FCMP_FALSE; break;
11708     case 0x1c: Pred = FCmpInst::FCMP_ONE;   break;
11709     case 0x1d: Pred = FCmpInst::FCMP_OGE;   break;
11710     case 0x1e: Pred = FCmpInst::FCMP_OGT;   break;
11711     case 0x1f: Pred = FCmpInst::FCMP_TRUE;  break;
11712     default: llvm_unreachable("Unhandled CC");
11713     }
11714 
11715     // Builtins without the _mask suffix return a vector of integers
11716     // of the same width as the input vectors
11717     switch (BuiltinID) {
11718     case X86::BI__builtin_ia32_cmpps512_mask:
11719     case X86::BI__builtin_ia32_cmppd512_mask:
11720     case X86::BI__builtin_ia32_cmpps128_mask:
11721     case X86::BI__builtin_ia32_cmpps256_mask:
11722     case X86::BI__builtin_ia32_cmppd128_mask:
11723     case X86::BI__builtin_ia32_cmppd256_mask: {
11724       unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
11725       Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
11726       return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
11727     }
11728     default:
11729       return getVectorFCmpIR(Pred);
11730     }
11731   }
11732 
11733   // SSE scalar comparison intrinsics
11734   case X86::BI__builtin_ia32_cmpeqss:
11735     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
11736   case X86::BI__builtin_ia32_cmpltss:
11737     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
11738   case X86::BI__builtin_ia32_cmpless:
11739     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
11740   case X86::BI__builtin_ia32_cmpunordss:
11741     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
11742   case X86::BI__builtin_ia32_cmpneqss:
11743     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
11744   case X86::BI__builtin_ia32_cmpnltss:
11745     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
11746   case X86::BI__builtin_ia32_cmpnless:
11747     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
11748   case X86::BI__builtin_ia32_cmpordss:
11749     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
11750   case X86::BI__builtin_ia32_cmpeqsd:
11751     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
11752   case X86::BI__builtin_ia32_cmpltsd:
11753     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
11754   case X86::BI__builtin_ia32_cmplesd:
11755     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
11756   case X86::BI__builtin_ia32_cmpunordsd:
11757     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
11758   case X86::BI__builtin_ia32_cmpneqsd:
11759     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
11760   case X86::BI__builtin_ia32_cmpnltsd:
11761     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
11762   case X86::BI__builtin_ia32_cmpnlesd:
11763     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
11764   case X86::BI__builtin_ia32_cmpordsd:
11765     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
11766 
11767   case X86::BI__emul:
11768   case X86::BI__emulu: {
11769     llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
11770     bool isSigned = (BuiltinID == X86::BI__emul);
11771     Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
11772     Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
11773     return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
11774   }
11775   case X86::BI__mulh:
11776   case X86::BI__umulh:
11777   case X86::BI_mul128:
11778   case X86::BI_umul128: {
11779     llvm::Type *ResType = ConvertType(E->getType());
11780     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11781 
11782     bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
11783     Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
11784     Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
11785 
11786     Value *MulResult, *HigherBits;
11787     if (IsSigned) {
11788       MulResult = Builder.CreateNSWMul(LHS, RHS);
11789       HigherBits = Builder.CreateAShr(MulResult, 64);
11790     } else {
11791       MulResult = Builder.CreateNUWMul(LHS, RHS);
11792       HigherBits = Builder.CreateLShr(MulResult, 64);
11793     }
11794     HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11795 
11796     if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
11797       return HigherBits;
11798 
11799     Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
11800     Builder.CreateStore(HigherBits, HighBitsAddress);
11801     return Builder.CreateIntCast(MulResult, ResType, IsSigned);
11802   }
11803 
11804   case X86::BI__faststorefence: {
11805     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11806                                llvm::SyncScope::System);
11807   }
11808   case X86::BI__shiftleft128:
11809   case X86::BI__shiftright128: {
11810     // FIXME: Once fshl/fshr no longer add an unneeded and and cmov, do this:
11811     // llvm::Function *F = CGM.getIntrinsic(
11812     //   BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
11813     //   Int64Ty);
11814     // Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
11815     // return Builder.CreateCall(F, Ops);
11816     llvm::Type *Int128Ty = Builder.getInt128Ty();
11817     Value *HighPart128 =
11818         Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64);
11819     Value *LowPart128 = Builder.CreateZExt(Ops[0], Int128Ty);
11820     Value *Val = Builder.CreateOr(HighPart128, LowPart128);
11821     Value *Amt = Builder.CreateAnd(Builder.CreateZExt(Ops[2], Int128Ty),
11822                                    llvm::ConstantInt::get(Int128Ty, 0x3f));
11823     Value *Res;
11824     if (BuiltinID == X86::BI__shiftleft128)
11825       Res = Builder.CreateLShr(Builder.CreateShl(Val, Amt), 64);
11826     else
11827       Res = Builder.CreateLShr(Val, Amt);
11828     return Builder.CreateTrunc(Res, Int64Ty);
11829   }
11830   case X86::BI_ReadWriteBarrier:
11831   case X86::BI_ReadBarrier:
11832   case X86::BI_WriteBarrier: {
11833     return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11834                                llvm::SyncScope::SingleThread);
11835   }
11836   case X86::BI_BitScanForward:
11837   case X86::BI_BitScanForward64:
11838     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
11839   case X86::BI_BitScanReverse:
11840   case X86::BI_BitScanReverse64:
11841     return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
11842 
11843   case X86::BI_InterlockedAnd64:
11844     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
11845   case X86::BI_InterlockedExchange64:
11846     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
11847   case X86::BI_InterlockedExchangeAdd64:
11848     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
11849   case X86::BI_InterlockedExchangeSub64:
11850     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
11851   case X86::BI_InterlockedOr64:
11852     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
11853   case X86::BI_InterlockedXor64:
11854     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
11855   case X86::BI_InterlockedDecrement64:
11856     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
11857   case X86::BI_InterlockedIncrement64:
11858     return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
11859   case X86::BI_InterlockedCompareExchange128: {
11860     // InterlockedCompareExchange128 doesn't directly refer to 128bit ints,
11861     // instead it takes pointers to 64bit ints for Destination and
11862     // ComparandResult, and exchange is taken as two 64bit ints (high & low).
11863     // The previous value is written to ComparandResult, and success is
11864     // returned.
11865 
11866     llvm::Type *Int128Ty = Builder.getInt128Ty();
11867     llvm::Type *Int128PtrTy = Int128Ty->getPointerTo();
11868 
11869     Value *Destination =
11870         Builder.CreateBitCast(Ops[0], Int128PtrTy);
11871     Value *ExchangeHigh128 = Builder.CreateZExt(Ops[1], Int128Ty);
11872     Value *ExchangeLow128 = Builder.CreateZExt(Ops[2], Int128Ty);
11873     Address ComparandResult(Builder.CreateBitCast(Ops[3], Int128PtrTy),
11874                             getContext().toCharUnitsFromBits(128));
11875 
11876     Value *Exchange = Builder.CreateOr(
11877         Builder.CreateShl(ExchangeHigh128, 64, "", false, false),
11878         ExchangeLow128);
11879 
11880     Value *Comparand = Builder.CreateLoad(ComparandResult);
11881 
11882     AtomicCmpXchgInst *CXI =
11883         Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
11884                                     AtomicOrdering::SequentiallyConsistent,
11885                                     AtomicOrdering::SequentiallyConsistent);
11886     CXI->setVolatile(true);
11887 
11888     // Write the result back to the inout pointer.
11889     Builder.CreateStore(Builder.CreateExtractValue(CXI, 0), ComparandResult);
11890 
11891     // Get the success boolean and zero extend it to i8.
11892     Value *Success = Builder.CreateExtractValue(CXI, 1);
11893     return Builder.CreateZExt(Success, ConvertType(E->getType()));
11894   }
11895 
11896   case X86::BI_AddressOfReturnAddress: {
11897     Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
11898     return Builder.CreateCall(F);
11899   }
11900   case X86::BI__stosb: {
11901     // We treat __stosb as a volatile memset - it may not generate "rep stosb"
11902     // instruction, but it will create a memset that won't be optimized away.
11903     return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
11904   }
11905   case X86::BI__ud2:
11906     // llvm.trap makes a ud2a instruction on x86.
11907     return EmitTrapCall(Intrinsic::trap);
11908   case X86::BI__int2c: {
11909     // This syscall signals a driver assertion failure in x86 NT kernels.
11910     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
11911     llvm::InlineAsm *IA =
11912         llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true);
11913     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
11914         getLLVMContext(), llvm::AttributeList::FunctionIndex,
11915         llvm::Attribute::NoReturn);
11916     llvm::CallInst *CI = Builder.CreateCall(IA);
11917     CI->setAttributes(NoReturnAttr);
11918     return CI;
11919   }
11920   case X86::BI__readfsbyte:
11921   case X86::BI__readfsword:
11922   case X86::BI__readfsdword:
11923   case X86::BI__readfsqword: {
11924     llvm::Type *IntTy = ConvertType(E->getType());
11925     Value *Ptr =
11926         Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 257));
11927     LoadInst *Load = Builder.CreateAlignedLoad(
11928         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
11929     Load->setVolatile(true);
11930     return Load;
11931   }
11932   case X86::BI__readgsbyte:
11933   case X86::BI__readgsword:
11934   case X86::BI__readgsdword:
11935   case X86::BI__readgsqword: {
11936     llvm::Type *IntTy = ConvertType(E->getType());
11937     Value *Ptr =
11938         Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 256));
11939     LoadInst *Load = Builder.CreateAlignedLoad(
11940         IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
11941     Load->setVolatile(true);
11942     return Load;
11943   }
11944   case X86::BI__builtin_ia32_paddsb512:
11945   case X86::BI__builtin_ia32_paddsw512:
11946   case X86::BI__builtin_ia32_paddsb256:
11947   case X86::BI__builtin_ia32_paddsw256:
11948   case X86::BI__builtin_ia32_paddsb128:
11949   case X86::BI__builtin_ia32_paddsw128:
11950     return EmitX86AddSubSatExpr(*this, Ops, true, true);
11951   case X86::BI__builtin_ia32_paddusb512:
11952   case X86::BI__builtin_ia32_paddusw512:
11953   case X86::BI__builtin_ia32_paddusb256:
11954   case X86::BI__builtin_ia32_paddusw256:
11955   case X86::BI__builtin_ia32_paddusb128:
11956   case X86::BI__builtin_ia32_paddusw128:
11957     return EmitX86AddSubSatExpr(*this, Ops, false, true);
11958   case X86::BI__builtin_ia32_psubsb512:
11959   case X86::BI__builtin_ia32_psubsw512:
11960   case X86::BI__builtin_ia32_psubsb256:
11961   case X86::BI__builtin_ia32_psubsw256:
11962   case X86::BI__builtin_ia32_psubsb128:
11963   case X86::BI__builtin_ia32_psubsw128:
11964     return EmitX86AddSubSatExpr(*this, Ops, true, false);
11965   case X86::BI__builtin_ia32_psubusb512:
11966   case X86::BI__builtin_ia32_psubusw512:
11967   case X86::BI__builtin_ia32_psubusb256:
11968   case X86::BI__builtin_ia32_psubusw256:
11969   case X86::BI__builtin_ia32_psubusb128:
11970   case X86::BI__builtin_ia32_psubusw128:
11971     return EmitX86AddSubSatExpr(*this, Ops, false, false);
11972   }
11973 }
11974 
11975 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
11976                                            const CallExpr *E) {
11977   SmallVector<Value*, 4> Ops;
11978 
11979   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
11980     Ops.push_back(EmitScalarExpr(E->getArg(i)));
11981 
11982   Intrinsic::ID ID = Intrinsic::not_intrinsic;
11983 
11984   switch (BuiltinID) {
11985   default: return nullptr;
11986 
11987   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
11988   // call __builtin_readcyclecounter.
11989   case PPC::BI__builtin_ppc_get_timebase:
11990     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
11991 
11992   // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
11993   case PPC::BI__builtin_altivec_lvx:
11994   case PPC::BI__builtin_altivec_lvxl:
11995   case PPC::BI__builtin_altivec_lvebx:
11996   case PPC::BI__builtin_altivec_lvehx:
11997   case PPC::BI__builtin_altivec_lvewx:
11998   case PPC::BI__builtin_altivec_lvsl:
11999   case PPC::BI__builtin_altivec_lvsr:
12000   case PPC::BI__builtin_vsx_lxvd2x:
12001   case PPC::BI__builtin_vsx_lxvw4x:
12002   case PPC::BI__builtin_vsx_lxvd2x_be:
12003   case PPC::BI__builtin_vsx_lxvw4x_be:
12004   case PPC::BI__builtin_vsx_lxvl:
12005   case PPC::BI__builtin_vsx_lxvll:
12006   {
12007     if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
12008        BuiltinID == PPC::BI__builtin_vsx_lxvll){
12009       Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
12010     }else {
12011       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
12012       Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
12013       Ops.pop_back();
12014     }
12015 
12016     switch (BuiltinID) {
12017     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
12018     case PPC::BI__builtin_altivec_lvx:
12019       ID = Intrinsic::ppc_altivec_lvx;
12020       break;
12021     case PPC::BI__builtin_altivec_lvxl:
12022       ID = Intrinsic::ppc_altivec_lvxl;
12023       break;
12024     case PPC::BI__builtin_altivec_lvebx:
12025       ID = Intrinsic::ppc_altivec_lvebx;
12026       break;
12027     case PPC::BI__builtin_altivec_lvehx:
12028       ID = Intrinsic::ppc_altivec_lvehx;
12029       break;
12030     case PPC::BI__builtin_altivec_lvewx:
12031       ID = Intrinsic::ppc_altivec_lvewx;
12032       break;
12033     case PPC::BI__builtin_altivec_lvsl:
12034       ID = Intrinsic::ppc_altivec_lvsl;
12035       break;
12036     case PPC::BI__builtin_altivec_lvsr:
12037       ID = Intrinsic::ppc_altivec_lvsr;
12038       break;
12039     case PPC::BI__builtin_vsx_lxvd2x:
12040       ID = Intrinsic::ppc_vsx_lxvd2x;
12041       break;
12042     case PPC::BI__builtin_vsx_lxvw4x:
12043       ID = Intrinsic::ppc_vsx_lxvw4x;
12044       break;
12045     case PPC::BI__builtin_vsx_lxvd2x_be:
12046       ID = Intrinsic::ppc_vsx_lxvd2x_be;
12047       break;
12048     case PPC::BI__builtin_vsx_lxvw4x_be:
12049       ID = Intrinsic::ppc_vsx_lxvw4x_be;
12050       break;
12051     case PPC::BI__builtin_vsx_lxvl:
12052       ID = Intrinsic::ppc_vsx_lxvl;
12053       break;
12054     case PPC::BI__builtin_vsx_lxvll:
12055       ID = Intrinsic::ppc_vsx_lxvll;
12056       break;
12057     }
12058     llvm::Function *F = CGM.getIntrinsic(ID);
12059     return Builder.CreateCall(F, Ops, "");
12060   }
12061 
12062   // vec_st, vec_xst_be
12063   case PPC::BI__builtin_altivec_stvx:
12064   case PPC::BI__builtin_altivec_stvxl:
12065   case PPC::BI__builtin_altivec_stvebx:
12066   case PPC::BI__builtin_altivec_stvehx:
12067   case PPC::BI__builtin_altivec_stvewx:
12068   case PPC::BI__builtin_vsx_stxvd2x:
12069   case PPC::BI__builtin_vsx_stxvw4x:
12070   case PPC::BI__builtin_vsx_stxvd2x_be:
12071   case PPC::BI__builtin_vsx_stxvw4x_be:
12072   case PPC::BI__builtin_vsx_stxvl:
12073   case PPC::BI__builtin_vsx_stxvll:
12074   {
12075     if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
12076       BuiltinID == PPC::BI__builtin_vsx_stxvll ){
12077       Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
12078     }else {
12079       Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
12080       Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
12081       Ops.pop_back();
12082     }
12083 
12084     switch (BuiltinID) {
12085     default: llvm_unreachable("Unsupported st intrinsic!");
12086     case PPC::BI__builtin_altivec_stvx:
12087       ID = Intrinsic::ppc_altivec_stvx;
12088       break;
12089     case PPC::BI__builtin_altivec_stvxl:
12090       ID = Intrinsic::ppc_altivec_stvxl;
12091       break;
12092     case PPC::BI__builtin_altivec_stvebx:
12093       ID = Intrinsic::ppc_altivec_stvebx;
12094       break;
12095     case PPC::BI__builtin_altivec_stvehx:
12096       ID = Intrinsic::ppc_altivec_stvehx;
12097       break;
12098     case PPC::BI__builtin_altivec_stvewx:
12099       ID = Intrinsic::ppc_altivec_stvewx;
12100       break;
12101     case PPC::BI__builtin_vsx_stxvd2x:
12102       ID = Intrinsic::ppc_vsx_stxvd2x;
12103       break;
12104     case PPC::BI__builtin_vsx_stxvw4x:
12105       ID = Intrinsic::ppc_vsx_stxvw4x;
12106       break;
12107     case PPC::BI__builtin_vsx_stxvd2x_be:
12108       ID = Intrinsic::ppc_vsx_stxvd2x_be;
12109       break;
12110     case PPC::BI__builtin_vsx_stxvw4x_be:
12111       ID = Intrinsic::ppc_vsx_stxvw4x_be;
12112       break;
12113     case PPC::BI__builtin_vsx_stxvl:
12114       ID = Intrinsic::ppc_vsx_stxvl;
12115       break;
12116     case PPC::BI__builtin_vsx_stxvll:
12117       ID = Intrinsic::ppc_vsx_stxvll;
12118       break;
12119     }
12120     llvm::Function *F = CGM.getIntrinsic(ID);
12121     return Builder.CreateCall(F, Ops, "");
12122   }
12123   // Square root
12124   case PPC::BI__builtin_vsx_xvsqrtsp:
12125   case PPC::BI__builtin_vsx_xvsqrtdp: {
12126     llvm::Type *ResultType = ConvertType(E->getType());
12127     Value *X = EmitScalarExpr(E->getArg(0));
12128     ID = Intrinsic::sqrt;
12129     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
12130     return Builder.CreateCall(F, X);
12131   }
12132   // Count leading zeros
12133   case PPC::BI__builtin_altivec_vclzb:
12134   case PPC::BI__builtin_altivec_vclzh:
12135   case PPC::BI__builtin_altivec_vclzw:
12136   case PPC::BI__builtin_altivec_vclzd: {
12137     llvm::Type *ResultType = ConvertType(E->getType());
12138     Value *X = EmitScalarExpr(E->getArg(0));
12139     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
12140     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
12141     return Builder.CreateCall(F, {X, Undef});
12142   }
12143   case PPC::BI__builtin_altivec_vctzb:
12144   case PPC::BI__builtin_altivec_vctzh:
12145   case PPC::BI__builtin_altivec_vctzw:
12146   case PPC::BI__builtin_altivec_vctzd: {
12147     llvm::Type *ResultType = ConvertType(E->getType());
12148     Value *X = EmitScalarExpr(E->getArg(0));
12149     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
12150     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
12151     return Builder.CreateCall(F, {X, Undef});
12152   }
12153   case PPC::BI__builtin_altivec_vpopcntb:
12154   case PPC::BI__builtin_altivec_vpopcnth:
12155   case PPC::BI__builtin_altivec_vpopcntw:
12156   case PPC::BI__builtin_altivec_vpopcntd: {
12157     llvm::Type *ResultType = ConvertType(E->getType());
12158     Value *X = EmitScalarExpr(E->getArg(0));
12159     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
12160     return Builder.CreateCall(F, X);
12161   }
12162   // Copy sign
12163   case PPC::BI__builtin_vsx_xvcpsgnsp:
12164   case PPC::BI__builtin_vsx_xvcpsgndp: {
12165     llvm::Type *ResultType = ConvertType(E->getType());
12166     Value *X = EmitScalarExpr(E->getArg(0));
12167     Value *Y = EmitScalarExpr(E->getArg(1));
12168     ID = Intrinsic::copysign;
12169     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
12170     return Builder.CreateCall(F, {X, Y});
12171   }
12172   // Rounding/truncation
12173   case PPC::BI__builtin_vsx_xvrspip:
12174   case PPC::BI__builtin_vsx_xvrdpip:
12175   case PPC::BI__builtin_vsx_xvrdpim:
12176   case PPC::BI__builtin_vsx_xvrspim:
12177   case PPC::BI__builtin_vsx_xvrdpi:
12178   case PPC::BI__builtin_vsx_xvrspi:
12179   case PPC::BI__builtin_vsx_xvrdpic:
12180   case PPC::BI__builtin_vsx_xvrspic:
12181   case PPC::BI__builtin_vsx_xvrdpiz:
12182   case PPC::BI__builtin_vsx_xvrspiz: {
12183     llvm::Type *ResultType = ConvertType(E->getType());
12184     Value *X = EmitScalarExpr(E->getArg(0));
12185     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
12186         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
12187       ID = Intrinsic::floor;
12188     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
12189              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
12190       ID = Intrinsic::round;
12191     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
12192              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
12193       ID = Intrinsic::nearbyint;
12194     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
12195              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
12196       ID = Intrinsic::ceil;
12197     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
12198              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
12199       ID = Intrinsic::trunc;
12200     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
12201     return Builder.CreateCall(F, X);
12202   }
12203 
12204   // Absolute value
12205   case PPC::BI__builtin_vsx_xvabsdp:
12206   case PPC::BI__builtin_vsx_xvabssp: {
12207     llvm::Type *ResultType = ConvertType(E->getType());
12208     Value *X = EmitScalarExpr(E->getArg(0));
12209     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
12210     return Builder.CreateCall(F, X);
12211   }
12212 
12213   // FMA variations
12214   case PPC::BI__builtin_vsx_xvmaddadp:
12215   case PPC::BI__builtin_vsx_xvmaddasp:
12216   case PPC::BI__builtin_vsx_xvnmaddadp:
12217   case PPC::BI__builtin_vsx_xvnmaddasp:
12218   case PPC::BI__builtin_vsx_xvmsubadp:
12219   case PPC::BI__builtin_vsx_xvmsubasp:
12220   case PPC::BI__builtin_vsx_xvnmsubadp:
12221   case PPC::BI__builtin_vsx_xvnmsubasp: {
12222     llvm::Type *ResultType = ConvertType(E->getType());
12223     Value *X = EmitScalarExpr(E->getArg(0));
12224     Value *Y = EmitScalarExpr(E->getArg(1));
12225     Value *Z = EmitScalarExpr(E->getArg(2));
12226     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
12227     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
12228     switch (BuiltinID) {
12229       case PPC::BI__builtin_vsx_xvmaddadp:
12230       case PPC::BI__builtin_vsx_xvmaddasp:
12231         return Builder.CreateCall(F, {X, Y, Z});
12232       case PPC::BI__builtin_vsx_xvnmaddadp:
12233       case PPC::BI__builtin_vsx_xvnmaddasp:
12234         return Builder.CreateFSub(Zero,
12235                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
12236       case PPC::BI__builtin_vsx_xvmsubadp:
12237       case PPC::BI__builtin_vsx_xvmsubasp:
12238         return Builder.CreateCall(F,
12239                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
12240       case PPC::BI__builtin_vsx_xvnmsubadp:
12241       case PPC::BI__builtin_vsx_xvnmsubasp:
12242         Value *FsubRes =
12243           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
12244         return Builder.CreateFSub(Zero, FsubRes, "sub");
12245     }
12246     llvm_unreachable("Unknown FMA operation");
12247     return nullptr; // Suppress no-return warning
12248   }
12249 
12250   case PPC::BI__builtin_vsx_insertword: {
12251     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
12252 
12253     // Third argument is a compile time constant int. It must be clamped to
12254     // to the range [0, 12].
12255     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
12256     assert(ArgCI &&
12257            "Third arg to xxinsertw intrinsic must be constant integer");
12258     const int64_t MaxIndex = 12;
12259     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
12260 
12261     // The builtin semantics don't exactly match the xxinsertw instructions
12262     // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
12263     // word from the first argument, and inserts it in the second argument. The
12264     // instruction extracts the word from its second input register and inserts
12265     // it into its first input register, so swap the first and second arguments.
12266     std::swap(Ops[0], Ops[1]);
12267 
12268     // Need to cast the second argument from a vector of unsigned int to a
12269     // vector of long long.
12270     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
12271 
12272     if (getTarget().isLittleEndian()) {
12273       // Create a shuffle mask of (1, 0)
12274       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
12275                                    ConstantInt::get(Int32Ty, 0)
12276                                  };
12277       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
12278 
12279       // Reverse the double words in the vector we will extract from.
12280       Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
12281       Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
12282 
12283       // Reverse the index.
12284       Index = MaxIndex - Index;
12285     }
12286 
12287     // Intrinsic expects the first arg to be a vector of int.
12288     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
12289     Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
12290     return Builder.CreateCall(F, Ops);
12291   }
12292 
12293   case PPC::BI__builtin_vsx_extractuword: {
12294     llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
12295 
12296     // Intrinsic expects the first argument to be a vector of doublewords.
12297     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
12298 
12299     // The second argument is a compile time constant int that needs to
12300     // be clamped to the range [0, 12].
12301     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
12302     assert(ArgCI &&
12303            "Second Arg to xxextractuw intrinsic must be a constant integer!");
12304     const int64_t MaxIndex = 12;
12305     int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
12306 
12307     if (getTarget().isLittleEndian()) {
12308       // Reverse the index.
12309       Index = MaxIndex - Index;
12310       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
12311 
12312       // Emit the call, then reverse the double words of the results vector.
12313       Value *Call = Builder.CreateCall(F, Ops);
12314 
12315       // Create a shuffle mask of (1, 0)
12316       Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
12317                                    ConstantInt::get(Int32Ty, 0)
12318                                  };
12319       Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
12320 
12321       Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
12322       return ShuffleCall;
12323     } else {
12324       Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
12325       return Builder.CreateCall(F, Ops);
12326     }
12327   }
12328 
12329   case PPC::BI__builtin_vsx_xxpermdi: {
12330     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
12331     assert(ArgCI && "Third arg must be constant integer!");
12332 
12333     unsigned Index = ArgCI->getZExtValue();
12334     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
12335     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
12336 
12337     // Account for endianness by treating this as just a shuffle. So we use the
12338     // same indices for both LE and BE in order to produce expected results in
12339     // both cases.
12340     unsigned ElemIdx0 = (Index & 2) >> 1;
12341     unsigned ElemIdx1 = 2 + (Index & 1);
12342 
12343     Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
12344                                 ConstantInt::get(Int32Ty, ElemIdx1)};
12345     Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
12346 
12347     Value *ShuffleCall =
12348         Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
12349     QualType BIRetType = E->getType();
12350     auto RetTy = ConvertType(BIRetType);
12351     return Builder.CreateBitCast(ShuffleCall, RetTy);
12352   }
12353 
12354   case PPC::BI__builtin_vsx_xxsldwi: {
12355     ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
12356     assert(ArgCI && "Third argument must be a compile time constant");
12357     unsigned Index = ArgCI->getZExtValue() & 0x3;
12358     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
12359     Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
12360 
12361     // Create a shuffle mask
12362     unsigned ElemIdx0;
12363     unsigned ElemIdx1;
12364     unsigned ElemIdx2;
12365     unsigned ElemIdx3;
12366     if (getTarget().isLittleEndian()) {
12367       // Little endian element N comes from element 8+N-Index of the
12368       // concatenated wide vector (of course, using modulo arithmetic on
12369       // the total number of elements).
12370       ElemIdx0 = (8 - Index) % 8;
12371       ElemIdx1 = (9 - Index) % 8;
12372       ElemIdx2 = (10 - Index) % 8;
12373       ElemIdx3 = (11 - Index) % 8;
12374     } else {
12375       // Big endian ElemIdx<N> = Index + N
12376       ElemIdx0 = Index;
12377       ElemIdx1 = Index + 1;
12378       ElemIdx2 = Index + 2;
12379       ElemIdx3 = Index + 3;
12380     }
12381 
12382     Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
12383                                 ConstantInt::get(Int32Ty, ElemIdx1),
12384                                 ConstantInt::get(Int32Ty, ElemIdx2),
12385                                 ConstantInt::get(Int32Ty, ElemIdx3)};
12386 
12387     Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
12388     Value *ShuffleCall =
12389         Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
12390     QualType BIRetType = E->getType();
12391     auto RetTy = ConvertType(BIRetType);
12392     return Builder.CreateBitCast(ShuffleCall, RetTy);
12393   }
12394 
12395   case PPC::BI__builtin_pack_vector_int128: {
12396     bool isLittleEndian = getTarget().isLittleEndian();
12397     Value *UndefValue =
12398         llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), 2));
12399     Value *Res = Builder.CreateInsertElement(
12400         UndefValue, Ops[0], (uint64_t)(isLittleEndian ? 1 : 0));
12401     Res = Builder.CreateInsertElement(Res, Ops[1],
12402                                       (uint64_t)(isLittleEndian ? 0 : 1));
12403     return Builder.CreateBitCast(Res, ConvertType(E->getType()));
12404   }
12405 
12406   case PPC::BI__builtin_unpack_vector_int128: {
12407     ConstantInt *Index = cast<ConstantInt>(Ops[1]);
12408     Value *Unpacked = Builder.CreateBitCast(
12409         Ops[0], llvm::VectorType::get(ConvertType(E->getType()), 2));
12410 
12411     if (getTarget().isLittleEndian())
12412       Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue());
12413 
12414     return Builder.CreateExtractElement(Unpacked, Index);
12415   }
12416   }
12417 }
12418 
12419 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
12420                                               const CallExpr *E) {
12421   switch (BuiltinID) {
12422   case AMDGPU::BI__builtin_amdgcn_div_scale:
12423   case AMDGPU::BI__builtin_amdgcn_div_scalef: {
12424     // Translate from the intrinsics's struct return to the builtin's out
12425     // argument.
12426 
12427     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
12428 
12429     llvm::Value *X = EmitScalarExpr(E->getArg(0));
12430     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
12431     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
12432 
12433     llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
12434                                            X->getType());
12435 
12436     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
12437 
12438     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
12439     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
12440 
12441     llvm::Type *RealFlagType
12442       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
12443 
12444     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
12445     Builder.CreateStore(FlagExt, FlagOutPtr);
12446     return Result;
12447   }
12448   case AMDGPU::BI__builtin_amdgcn_div_fmas:
12449   case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
12450     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
12451     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
12452     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
12453     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
12454 
12455     llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
12456                                       Src0->getType());
12457     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
12458     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
12459   }
12460 
12461   case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
12462     return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
12463   case AMDGPU::BI__builtin_amdgcn_mov_dpp:
12464   case AMDGPU::BI__builtin_amdgcn_update_dpp: {
12465     llvm::SmallVector<llvm::Value *, 6> Args;
12466     for (unsigned I = 0; I != E->getNumArgs(); ++I)
12467       Args.push_back(EmitScalarExpr(E->getArg(I)));
12468     assert(Args.size() == 5 || Args.size() == 6);
12469     if (Args.size() == 5)
12470       Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType()));
12471     Function *F =
12472         CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
12473     return Builder.CreateCall(F, Args);
12474   }
12475   case AMDGPU::BI__builtin_amdgcn_div_fixup:
12476   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
12477   case AMDGPU::BI__builtin_amdgcn_div_fixuph:
12478     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
12479   case AMDGPU::BI__builtin_amdgcn_trig_preop:
12480   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
12481     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
12482   case AMDGPU::BI__builtin_amdgcn_rcp:
12483   case AMDGPU::BI__builtin_amdgcn_rcpf:
12484   case AMDGPU::BI__builtin_amdgcn_rcph:
12485     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
12486   case AMDGPU::BI__builtin_amdgcn_rsq:
12487   case AMDGPU::BI__builtin_amdgcn_rsqf:
12488   case AMDGPU::BI__builtin_amdgcn_rsqh:
12489     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
12490   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
12491   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
12492     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
12493   case AMDGPU::BI__builtin_amdgcn_sinf:
12494   case AMDGPU::BI__builtin_amdgcn_sinh:
12495     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
12496   case AMDGPU::BI__builtin_amdgcn_cosf:
12497   case AMDGPU::BI__builtin_amdgcn_cosh:
12498     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
12499   case AMDGPU::BI__builtin_amdgcn_log_clampf:
12500     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
12501   case AMDGPU::BI__builtin_amdgcn_ldexp:
12502   case AMDGPU::BI__builtin_amdgcn_ldexpf:
12503   case AMDGPU::BI__builtin_amdgcn_ldexph:
12504     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
12505   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
12506   case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
12507   case AMDGPU::BI__builtin_amdgcn_frexp_manth:
12508     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
12509   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
12510   case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
12511     Value *Src0 = EmitScalarExpr(E->getArg(0));
12512     Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
12513                                 { Builder.getInt32Ty(), Src0->getType() });
12514     return Builder.CreateCall(F, Src0);
12515   }
12516   case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
12517     Value *Src0 = EmitScalarExpr(E->getArg(0));
12518     Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
12519                                 { Builder.getInt16Ty(), Src0->getType() });
12520     return Builder.CreateCall(F, Src0);
12521   }
12522   case AMDGPU::BI__builtin_amdgcn_fract:
12523   case AMDGPU::BI__builtin_amdgcn_fractf:
12524   case AMDGPU::BI__builtin_amdgcn_fracth:
12525     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
12526   case AMDGPU::BI__builtin_amdgcn_lerp:
12527     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
12528   case AMDGPU::BI__builtin_amdgcn_uicmp:
12529   case AMDGPU::BI__builtin_amdgcn_uicmpl:
12530   case AMDGPU::BI__builtin_amdgcn_sicmp:
12531   case AMDGPU::BI__builtin_amdgcn_sicmpl:
12532     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
12533   case AMDGPU::BI__builtin_amdgcn_fcmp:
12534   case AMDGPU::BI__builtin_amdgcn_fcmpf:
12535     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
12536   case AMDGPU::BI__builtin_amdgcn_class:
12537   case AMDGPU::BI__builtin_amdgcn_classf:
12538   case AMDGPU::BI__builtin_amdgcn_classh:
12539     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
12540   case AMDGPU::BI__builtin_amdgcn_fmed3f:
12541   case AMDGPU::BI__builtin_amdgcn_fmed3h:
12542     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
12543   case AMDGPU::BI__builtin_amdgcn_ds_append:
12544   case AMDGPU::BI__builtin_amdgcn_ds_consume: {
12545     Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
12546       Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
12547     Value *Src0 = EmitScalarExpr(E->getArg(0));
12548     Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
12549     return Builder.CreateCall(F, { Src0, Builder.getFalse() });
12550   }
12551   case AMDGPU::BI__builtin_amdgcn_read_exec: {
12552     CallInst *CI = cast<CallInst>(
12553       EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
12554     CI->setConvergent();
12555     return CI;
12556   }
12557   case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
12558   case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
12559     StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ?
12560       "exec_lo" : "exec_hi";
12561     CallInst *CI = cast<CallInst>(
12562       EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, true, RegName));
12563     CI->setConvergent();
12564     return CI;
12565   }
12566   // amdgcn workitem
12567   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
12568     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
12569   case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
12570     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
12571   case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
12572     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
12573 
12574   // r600 intrinsics
12575   case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
12576   case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
12577     return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
12578   case AMDGPU::BI__builtin_r600_read_tidig_x:
12579     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
12580   case AMDGPU::BI__builtin_r600_read_tidig_y:
12581     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
12582   case AMDGPU::BI__builtin_r600_read_tidig_z:
12583     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
12584   default:
12585     return nullptr;
12586   }
12587 }
12588 
12589 /// Handle a SystemZ function in which the final argument is a pointer
12590 /// to an int that receives the post-instruction CC value.  At the LLVM level
12591 /// this is represented as a function that returns a {result, cc} pair.
12592 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
12593                                          unsigned IntrinsicID,
12594                                          const CallExpr *E) {
12595   unsigned NumArgs = E->getNumArgs() - 1;
12596   SmallVector<Value *, 8> Args(NumArgs);
12597   for (unsigned I = 0; I < NumArgs; ++I)
12598     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
12599   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
12600   Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
12601   Value *Call = CGF.Builder.CreateCall(F, Args);
12602   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
12603   CGF.Builder.CreateStore(CC, CCPtr);
12604   return CGF.Builder.CreateExtractValue(Call, 0);
12605 }
12606 
12607 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
12608                                                const CallExpr *E) {
12609   switch (BuiltinID) {
12610   case SystemZ::BI__builtin_tbegin: {
12611     Value *TDB = EmitScalarExpr(E->getArg(0));
12612     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
12613     Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
12614     return Builder.CreateCall(F, {TDB, Control});
12615   }
12616   case SystemZ::BI__builtin_tbegin_nofloat: {
12617     Value *TDB = EmitScalarExpr(E->getArg(0));
12618     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
12619     Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
12620     return Builder.CreateCall(F, {TDB, Control});
12621   }
12622   case SystemZ::BI__builtin_tbeginc: {
12623     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
12624     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
12625     Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
12626     return Builder.CreateCall(F, {TDB, Control});
12627   }
12628   case SystemZ::BI__builtin_tabort: {
12629     Value *Data = EmitScalarExpr(E->getArg(0));
12630     Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
12631     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
12632   }
12633   case SystemZ::BI__builtin_non_tx_store: {
12634     Value *Address = EmitScalarExpr(E->getArg(0));
12635     Value *Data = EmitScalarExpr(E->getArg(1));
12636     Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
12637     return Builder.CreateCall(F, {Data, Address});
12638   }
12639 
12640   // Vector builtins.  Note that most vector builtins are mapped automatically
12641   // to target-specific LLVM intrinsics.  The ones handled specially here can
12642   // be represented via standard LLVM IR, which is preferable to enable common
12643   // LLVM optimizations.
12644 
12645   case SystemZ::BI__builtin_s390_vpopctb:
12646   case SystemZ::BI__builtin_s390_vpopcth:
12647   case SystemZ::BI__builtin_s390_vpopctf:
12648   case SystemZ::BI__builtin_s390_vpopctg: {
12649     llvm::Type *ResultType = ConvertType(E->getType());
12650     Value *X = EmitScalarExpr(E->getArg(0));
12651     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
12652     return Builder.CreateCall(F, X);
12653   }
12654 
12655   case SystemZ::BI__builtin_s390_vclzb:
12656   case SystemZ::BI__builtin_s390_vclzh:
12657   case SystemZ::BI__builtin_s390_vclzf:
12658   case SystemZ::BI__builtin_s390_vclzg: {
12659     llvm::Type *ResultType = ConvertType(E->getType());
12660     Value *X = EmitScalarExpr(E->getArg(0));
12661     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
12662     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
12663     return Builder.CreateCall(F, {X, Undef});
12664   }
12665 
12666   case SystemZ::BI__builtin_s390_vctzb:
12667   case SystemZ::BI__builtin_s390_vctzh:
12668   case SystemZ::BI__builtin_s390_vctzf:
12669   case SystemZ::BI__builtin_s390_vctzg: {
12670     llvm::Type *ResultType = ConvertType(E->getType());
12671     Value *X = EmitScalarExpr(E->getArg(0));
12672     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
12673     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
12674     return Builder.CreateCall(F, {X, Undef});
12675   }
12676 
12677   case SystemZ::BI__builtin_s390_vfsqsb:
12678   case SystemZ::BI__builtin_s390_vfsqdb: {
12679     llvm::Type *ResultType = ConvertType(E->getType());
12680     Value *X = EmitScalarExpr(E->getArg(0));
12681     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
12682     return Builder.CreateCall(F, X);
12683   }
12684   case SystemZ::BI__builtin_s390_vfmasb:
12685   case SystemZ::BI__builtin_s390_vfmadb: {
12686     llvm::Type *ResultType = ConvertType(E->getType());
12687     Value *X = EmitScalarExpr(E->getArg(0));
12688     Value *Y = EmitScalarExpr(E->getArg(1));
12689     Value *Z = EmitScalarExpr(E->getArg(2));
12690     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
12691     return Builder.CreateCall(F, {X, Y, Z});
12692   }
12693   case SystemZ::BI__builtin_s390_vfmssb:
12694   case SystemZ::BI__builtin_s390_vfmsdb: {
12695     llvm::Type *ResultType = ConvertType(E->getType());
12696     Value *X = EmitScalarExpr(E->getArg(0));
12697     Value *Y = EmitScalarExpr(E->getArg(1));
12698     Value *Z = EmitScalarExpr(E->getArg(2));
12699     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
12700     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
12701     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
12702   }
12703   case SystemZ::BI__builtin_s390_vfnmasb:
12704   case SystemZ::BI__builtin_s390_vfnmadb: {
12705     llvm::Type *ResultType = ConvertType(E->getType());
12706     Value *X = EmitScalarExpr(E->getArg(0));
12707     Value *Y = EmitScalarExpr(E->getArg(1));
12708     Value *Z = EmitScalarExpr(E->getArg(2));
12709     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
12710     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
12711     return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub");
12712   }
12713   case SystemZ::BI__builtin_s390_vfnmssb:
12714   case SystemZ::BI__builtin_s390_vfnmsdb: {
12715     llvm::Type *ResultType = ConvertType(E->getType());
12716     Value *X = EmitScalarExpr(E->getArg(0));
12717     Value *Y = EmitScalarExpr(E->getArg(1));
12718     Value *Z = EmitScalarExpr(E->getArg(2));
12719     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
12720     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
12721     Value *NegZ = Builder.CreateFSub(Zero, Z, "sub");
12722     return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ}));
12723   }
12724   case SystemZ::BI__builtin_s390_vflpsb:
12725   case SystemZ::BI__builtin_s390_vflpdb: {
12726     llvm::Type *ResultType = ConvertType(E->getType());
12727     Value *X = EmitScalarExpr(E->getArg(0));
12728     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
12729     return Builder.CreateCall(F, X);
12730   }
12731   case SystemZ::BI__builtin_s390_vflnsb:
12732   case SystemZ::BI__builtin_s390_vflndb: {
12733     llvm::Type *ResultType = ConvertType(E->getType());
12734     Value *X = EmitScalarExpr(E->getArg(0));
12735     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
12736     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
12737     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
12738   }
12739   case SystemZ::BI__builtin_s390_vfisb:
12740   case SystemZ::BI__builtin_s390_vfidb: {
12741     llvm::Type *ResultType = ConvertType(E->getType());
12742     Value *X = EmitScalarExpr(E->getArg(0));
12743     // Constant-fold the M4 and M5 mask arguments.
12744     llvm::APSInt M4, M5;
12745     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
12746     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
12747     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
12748     (void)IsConstM4; (void)IsConstM5;
12749     // Check whether this instance can be represented via a LLVM standard
12750     // intrinsic.  We only support some combinations of M4 and M5.
12751     Intrinsic::ID ID = Intrinsic::not_intrinsic;
12752     switch (M4.getZExtValue()) {
12753     default: break;
12754     case 0:  // IEEE-inexact exception allowed
12755       switch (M5.getZExtValue()) {
12756       default: break;
12757       case 0: ID = Intrinsic::rint; break;
12758       }
12759       break;
12760     case 4:  // IEEE-inexact exception suppressed
12761       switch (M5.getZExtValue()) {
12762       default: break;
12763       case 0: ID = Intrinsic::nearbyint; break;
12764       case 1: ID = Intrinsic::round; break;
12765       case 5: ID = Intrinsic::trunc; break;
12766       case 6: ID = Intrinsic::ceil; break;
12767       case 7: ID = Intrinsic::floor; break;
12768       }
12769       break;
12770     }
12771     if (ID != Intrinsic::not_intrinsic) {
12772       Function *F = CGM.getIntrinsic(ID, ResultType);
12773       return Builder.CreateCall(F, X);
12774     }
12775     switch (BuiltinID) {
12776       case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
12777       case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
12778       default: llvm_unreachable("Unknown BuiltinID");
12779     }
12780     Function *F = CGM.getIntrinsic(ID);
12781     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
12782     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
12783     return Builder.CreateCall(F, {X, M4Value, M5Value});
12784   }
12785   case SystemZ::BI__builtin_s390_vfmaxsb:
12786   case SystemZ::BI__builtin_s390_vfmaxdb: {
12787     llvm::Type *ResultType = ConvertType(E->getType());
12788     Value *X = EmitScalarExpr(E->getArg(0));
12789     Value *Y = EmitScalarExpr(E->getArg(1));
12790     // Constant-fold the M4 mask argument.
12791     llvm::APSInt M4;
12792     bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
12793     assert(IsConstM4 && "Constant arg isn't actually constant?");
12794     (void)IsConstM4;
12795     // Check whether this instance can be represented via a LLVM standard
12796     // intrinsic.  We only support some values of M4.
12797     Intrinsic::ID ID = Intrinsic::not_intrinsic;
12798     switch (M4.getZExtValue()) {
12799     default: break;
12800     case 4: ID = Intrinsic::maxnum; break;
12801     }
12802     if (ID != Intrinsic::not_intrinsic) {
12803       Function *F = CGM.getIntrinsic(ID, ResultType);
12804       return Builder.CreateCall(F, {X, Y});
12805     }
12806     switch (BuiltinID) {
12807       case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
12808       case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
12809       default: llvm_unreachable("Unknown BuiltinID");
12810     }
12811     Function *F = CGM.getIntrinsic(ID);
12812     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
12813     return Builder.CreateCall(F, {X, Y, M4Value});
12814   }
12815   case SystemZ::BI__builtin_s390_vfminsb:
12816   case SystemZ::BI__builtin_s390_vfmindb: {
12817     llvm::Type *ResultType = ConvertType(E->getType());
12818     Value *X = EmitScalarExpr(E->getArg(0));
12819     Value *Y = EmitScalarExpr(E->getArg(1));
12820     // Constant-fold the M4 mask argument.
12821     llvm::APSInt M4;
12822     bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
12823     assert(IsConstM4 && "Constant arg isn't actually constant?");
12824     (void)IsConstM4;
12825     // Check whether this instance can be represented via a LLVM standard
12826     // intrinsic.  We only support some values of M4.
12827     Intrinsic::ID ID = Intrinsic::not_intrinsic;
12828     switch (M4.getZExtValue()) {
12829     default: break;
12830     case 4: ID = Intrinsic::minnum; break;
12831     }
12832     if (ID != Intrinsic::not_intrinsic) {
12833       Function *F = CGM.getIntrinsic(ID, ResultType);
12834       return Builder.CreateCall(F, {X, Y});
12835     }
12836     switch (BuiltinID) {
12837       case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
12838       case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
12839       default: llvm_unreachable("Unknown BuiltinID");
12840     }
12841     Function *F = CGM.getIntrinsic(ID);
12842     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
12843     return Builder.CreateCall(F, {X, Y, M4Value});
12844   }
12845 
12846   // Vector intrinsics that output the post-instruction CC value.
12847 
12848 #define INTRINSIC_WITH_CC(NAME) \
12849     case SystemZ::BI__builtin_##NAME: \
12850       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
12851 
12852   INTRINSIC_WITH_CC(s390_vpkshs);
12853   INTRINSIC_WITH_CC(s390_vpksfs);
12854   INTRINSIC_WITH_CC(s390_vpksgs);
12855 
12856   INTRINSIC_WITH_CC(s390_vpklshs);
12857   INTRINSIC_WITH_CC(s390_vpklsfs);
12858   INTRINSIC_WITH_CC(s390_vpklsgs);
12859 
12860   INTRINSIC_WITH_CC(s390_vceqbs);
12861   INTRINSIC_WITH_CC(s390_vceqhs);
12862   INTRINSIC_WITH_CC(s390_vceqfs);
12863   INTRINSIC_WITH_CC(s390_vceqgs);
12864 
12865   INTRINSIC_WITH_CC(s390_vchbs);
12866   INTRINSIC_WITH_CC(s390_vchhs);
12867   INTRINSIC_WITH_CC(s390_vchfs);
12868   INTRINSIC_WITH_CC(s390_vchgs);
12869 
12870   INTRINSIC_WITH_CC(s390_vchlbs);
12871   INTRINSIC_WITH_CC(s390_vchlhs);
12872   INTRINSIC_WITH_CC(s390_vchlfs);
12873   INTRINSIC_WITH_CC(s390_vchlgs);
12874 
12875   INTRINSIC_WITH_CC(s390_vfaebs);
12876   INTRINSIC_WITH_CC(s390_vfaehs);
12877   INTRINSIC_WITH_CC(s390_vfaefs);
12878 
12879   INTRINSIC_WITH_CC(s390_vfaezbs);
12880   INTRINSIC_WITH_CC(s390_vfaezhs);
12881   INTRINSIC_WITH_CC(s390_vfaezfs);
12882 
12883   INTRINSIC_WITH_CC(s390_vfeebs);
12884   INTRINSIC_WITH_CC(s390_vfeehs);
12885   INTRINSIC_WITH_CC(s390_vfeefs);
12886 
12887   INTRINSIC_WITH_CC(s390_vfeezbs);
12888   INTRINSIC_WITH_CC(s390_vfeezhs);
12889   INTRINSIC_WITH_CC(s390_vfeezfs);
12890 
12891   INTRINSIC_WITH_CC(s390_vfenebs);
12892   INTRINSIC_WITH_CC(s390_vfenehs);
12893   INTRINSIC_WITH_CC(s390_vfenefs);
12894 
12895   INTRINSIC_WITH_CC(s390_vfenezbs);
12896   INTRINSIC_WITH_CC(s390_vfenezhs);
12897   INTRINSIC_WITH_CC(s390_vfenezfs);
12898 
12899   INTRINSIC_WITH_CC(s390_vistrbs);
12900   INTRINSIC_WITH_CC(s390_vistrhs);
12901   INTRINSIC_WITH_CC(s390_vistrfs);
12902 
12903   INTRINSIC_WITH_CC(s390_vstrcbs);
12904   INTRINSIC_WITH_CC(s390_vstrchs);
12905   INTRINSIC_WITH_CC(s390_vstrcfs);
12906 
12907   INTRINSIC_WITH_CC(s390_vstrczbs);
12908   INTRINSIC_WITH_CC(s390_vstrczhs);
12909   INTRINSIC_WITH_CC(s390_vstrczfs);
12910 
12911   INTRINSIC_WITH_CC(s390_vfcesbs);
12912   INTRINSIC_WITH_CC(s390_vfcedbs);
12913   INTRINSIC_WITH_CC(s390_vfchsbs);
12914   INTRINSIC_WITH_CC(s390_vfchdbs);
12915   INTRINSIC_WITH_CC(s390_vfchesbs);
12916   INTRINSIC_WITH_CC(s390_vfchedbs);
12917 
12918   INTRINSIC_WITH_CC(s390_vftcisb);
12919   INTRINSIC_WITH_CC(s390_vftcidb);
12920 
12921 #undef INTRINSIC_WITH_CC
12922 
12923   default:
12924     return nullptr;
12925   }
12926 }
12927 
12928 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
12929                                              const CallExpr *E) {
12930   auto MakeLdg = [&](unsigned IntrinsicID) {
12931     Value *Ptr = EmitScalarExpr(E->getArg(0));
12932     clang::CharUnits Align =
12933         getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
12934     return Builder.CreateCall(
12935         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
12936                                        Ptr->getType()}),
12937         {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
12938   };
12939   auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
12940     Value *Ptr = EmitScalarExpr(E->getArg(0));
12941     return Builder.CreateCall(
12942         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
12943                                        Ptr->getType()}),
12944         {Ptr, EmitScalarExpr(E->getArg(1))});
12945   };
12946   switch (BuiltinID) {
12947   case NVPTX::BI__nvvm_atom_add_gen_i:
12948   case NVPTX::BI__nvvm_atom_add_gen_l:
12949   case NVPTX::BI__nvvm_atom_add_gen_ll:
12950     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
12951 
12952   case NVPTX::BI__nvvm_atom_sub_gen_i:
12953   case NVPTX::BI__nvvm_atom_sub_gen_l:
12954   case NVPTX::BI__nvvm_atom_sub_gen_ll:
12955     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
12956 
12957   case NVPTX::BI__nvvm_atom_and_gen_i:
12958   case NVPTX::BI__nvvm_atom_and_gen_l:
12959   case NVPTX::BI__nvvm_atom_and_gen_ll:
12960     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
12961 
12962   case NVPTX::BI__nvvm_atom_or_gen_i:
12963   case NVPTX::BI__nvvm_atom_or_gen_l:
12964   case NVPTX::BI__nvvm_atom_or_gen_ll:
12965     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
12966 
12967   case NVPTX::BI__nvvm_atom_xor_gen_i:
12968   case NVPTX::BI__nvvm_atom_xor_gen_l:
12969   case NVPTX::BI__nvvm_atom_xor_gen_ll:
12970     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
12971 
12972   case NVPTX::BI__nvvm_atom_xchg_gen_i:
12973   case NVPTX::BI__nvvm_atom_xchg_gen_l:
12974   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
12975     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
12976 
12977   case NVPTX::BI__nvvm_atom_max_gen_i:
12978   case NVPTX::BI__nvvm_atom_max_gen_l:
12979   case NVPTX::BI__nvvm_atom_max_gen_ll:
12980     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
12981 
12982   case NVPTX::BI__nvvm_atom_max_gen_ui:
12983   case NVPTX::BI__nvvm_atom_max_gen_ul:
12984   case NVPTX::BI__nvvm_atom_max_gen_ull:
12985     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
12986 
12987   case NVPTX::BI__nvvm_atom_min_gen_i:
12988   case NVPTX::BI__nvvm_atom_min_gen_l:
12989   case NVPTX::BI__nvvm_atom_min_gen_ll:
12990     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
12991 
12992   case NVPTX::BI__nvvm_atom_min_gen_ui:
12993   case NVPTX::BI__nvvm_atom_min_gen_ul:
12994   case NVPTX::BI__nvvm_atom_min_gen_ull:
12995     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
12996 
12997   case NVPTX::BI__nvvm_atom_cas_gen_i:
12998   case NVPTX::BI__nvvm_atom_cas_gen_l:
12999   case NVPTX::BI__nvvm_atom_cas_gen_ll:
13000     // __nvvm_atom_cas_gen_* should return the old value rather than the
13001     // success flag.
13002     return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
13003 
13004   case NVPTX::BI__nvvm_atom_add_gen_f: {
13005     Value *Ptr = EmitScalarExpr(E->getArg(0));
13006     Value *Val = EmitScalarExpr(E->getArg(1));
13007     // atomicrmw only deals with integer arguments so we need to use
13008     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
13009     Function *FnALAF32 =
13010         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
13011     return Builder.CreateCall(FnALAF32, {Ptr, Val});
13012   }
13013 
13014   case NVPTX::BI__nvvm_atom_add_gen_d: {
13015     Value *Ptr = EmitScalarExpr(E->getArg(0));
13016     Value *Val = EmitScalarExpr(E->getArg(1));
13017     // atomicrmw only deals with integer arguments, so we need to use
13018     // LLVM's nvvm_atomic_load_add_f64 intrinsic.
13019     Function *FnALAF64 =
13020         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f64, Ptr->getType());
13021     return Builder.CreateCall(FnALAF64, {Ptr, Val});
13022   }
13023 
13024   case NVPTX::BI__nvvm_atom_inc_gen_ui: {
13025     Value *Ptr = EmitScalarExpr(E->getArg(0));
13026     Value *Val = EmitScalarExpr(E->getArg(1));
13027     Function *FnALI32 =
13028         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
13029     return Builder.CreateCall(FnALI32, {Ptr, Val});
13030   }
13031 
13032   case NVPTX::BI__nvvm_atom_dec_gen_ui: {
13033     Value *Ptr = EmitScalarExpr(E->getArg(0));
13034     Value *Val = EmitScalarExpr(E->getArg(1));
13035     Function *FnALD32 =
13036         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
13037     return Builder.CreateCall(FnALD32, {Ptr, Val});
13038   }
13039 
13040   case NVPTX::BI__nvvm_ldg_c:
13041   case NVPTX::BI__nvvm_ldg_c2:
13042   case NVPTX::BI__nvvm_ldg_c4:
13043   case NVPTX::BI__nvvm_ldg_s:
13044   case NVPTX::BI__nvvm_ldg_s2:
13045   case NVPTX::BI__nvvm_ldg_s4:
13046   case NVPTX::BI__nvvm_ldg_i:
13047   case NVPTX::BI__nvvm_ldg_i2:
13048   case NVPTX::BI__nvvm_ldg_i4:
13049   case NVPTX::BI__nvvm_ldg_l:
13050   case NVPTX::BI__nvvm_ldg_ll:
13051   case NVPTX::BI__nvvm_ldg_ll2:
13052   case NVPTX::BI__nvvm_ldg_uc:
13053   case NVPTX::BI__nvvm_ldg_uc2:
13054   case NVPTX::BI__nvvm_ldg_uc4:
13055   case NVPTX::BI__nvvm_ldg_us:
13056   case NVPTX::BI__nvvm_ldg_us2:
13057   case NVPTX::BI__nvvm_ldg_us4:
13058   case NVPTX::BI__nvvm_ldg_ui:
13059   case NVPTX::BI__nvvm_ldg_ui2:
13060   case NVPTX::BI__nvvm_ldg_ui4:
13061   case NVPTX::BI__nvvm_ldg_ul:
13062   case NVPTX::BI__nvvm_ldg_ull:
13063   case NVPTX::BI__nvvm_ldg_ull2:
13064     // PTX Interoperability section 2.2: "For a vector with an even number of
13065     // elements, its alignment is set to number of elements times the alignment
13066     // of its member: n*alignof(t)."
13067     return MakeLdg(Intrinsic::nvvm_ldg_global_i);
13068   case NVPTX::BI__nvvm_ldg_f:
13069   case NVPTX::BI__nvvm_ldg_f2:
13070   case NVPTX::BI__nvvm_ldg_f4:
13071   case NVPTX::BI__nvvm_ldg_d:
13072   case NVPTX::BI__nvvm_ldg_d2:
13073     return MakeLdg(Intrinsic::nvvm_ldg_global_f);
13074 
13075   case NVPTX::BI__nvvm_atom_cta_add_gen_i:
13076   case NVPTX::BI__nvvm_atom_cta_add_gen_l:
13077   case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
13078     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
13079   case NVPTX::BI__nvvm_atom_sys_add_gen_i:
13080   case NVPTX::BI__nvvm_atom_sys_add_gen_l:
13081   case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
13082     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
13083   case NVPTX::BI__nvvm_atom_cta_add_gen_f:
13084   case NVPTX::BI__nvvm_atom_cta_add_gen_d:
13085     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
13086   case NVPTX::BI__nvvm_atom_sys_add_gen_f:
13087   case NVPTX::BI__nvvm_atom_sys_add_gen_d:
13088     return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
13089   case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
13090   case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
13091   case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
13092     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
13093   case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
13094   case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
13095   case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
13096     return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
13097   case NVPTX::BI__nvvm_atom_cta_max_gen_i:
13098   case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
13099   case NVPTX::BI__nvvm_atom_cta_max_gen_l:
13100   case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
13101   case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
13102   case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
13103     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
13104   case NVPTX::BI__nvvm_atom_sys_max_gen_i:
13105   case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
13106   case NVPTX::BI__nvvm_atom_sys_max_gen_l:
13107   case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
13108   case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
13109   case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
13110     return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
13111   case NVPTX::BI__nvvm_atom_cta_min_gen_i:
13112   case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
13113   case NVPTX::BI__nvvm_atom_cta_min_gen_l:
13114   case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
13115   case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
13116   case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
13117     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
13118   case NVPTX::BI__nvvm_atom_sys_min_gen_i:
13119   case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
13120   case NVPTX::BI__nvvm_atom_sys_min_gen_l:
13121   case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
13122   case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
13123   case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
13124     return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
13125   case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
13126     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
13127   case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
13128     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
13129   case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
13130     return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
13131   case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
13132     return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
13133   case NVPTX::BI__nvvm_atom_cta_and_gen_i:
13134   case NVPTX::BI__nvvm_atom_cta_and_gen_l:
13135   case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
13136     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
13137   case NVPTX::BI__nvvm_atom_sys_and_gen_i:
13138   case NVPTX::BI__nvvm_atom_sys_and_gen_l:
13139   case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
13140     return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
13141   case NVPTX::BI__nvvm_atom_cta_or_gen_i:
13142   case NVPTX::BI__nvvm_atom_cta_or_gen_l:
13143   case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
13144     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
13145   case NVPTX::BI__nvvm_atom_sys_or_gen_i:
13146   case NVPTX::BI__nvvm_atom_sys_or_gen_l:
13147   case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
13148     return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
13149   case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
13150   case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
13151   case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
13152     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
13153   case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
13154   case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
13155   case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
13156     return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
13157   case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
13158   case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
13159   case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
13160     Value *Ptr = EmitScalarExpr(E->getArg(0));
13161     return Builder.CreateCall(
13162         CGM.getIntrinsic(
13163             Intrinsic::nvvm_atomic_cas_gen_i_cta,
13164             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
13165         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
13166   }
13167   case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
13168   case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
13169   case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
13170     Value *Ptr = EmitScalarExpr(E->getArg(0));
13171     return Builder.CreateCall(
13172         CGM.getIntrinsic(
13173             Intrinsic::nvvm_atomic_cas_gen_i_sys,
13174             {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
13175         {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
13176   }
13177   case NVPTX::BI__nvvm_match_all_sync_i32p:
13178   case NVPTX::BI__nvvm_match_all_sync_i64p: {
13179     Value *Mask = EmitScalarExpr(E->getArg(0));
13180     Value *Val = EmitScalarExpr(E->getArg(1));
13181     Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
13182     Value *ResultPair = Builder.CreateCall(
13183         CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
13184                              ? Intrinsic::nvvm_match_all_sync_i32p
13185                              : Intrinsic::nvvm_match_all_sync_i64p),
13186         {Mask, Val});
13187     Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
13188                                      PredOutPtr.getElementType());
13189     Builder.CreateStore(Pred, PredOutPtr);
13190     return Builder.CreateExtractValue(ResultPair, 0);
13191   }
13192   case NVPTX::BI__hmma_m16n16k16_ld_a:
13193   case NVPTX::BI__hmma_m16n16k16_ld_b:
13194   case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
13195   case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
13196   case NVPTX::BI__hmma_m32n8k16_ld_a:
13197   case NVPTX::BI__hmma_m32n8k16_ld_b:
13198   case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
13199   case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
13200   case NVPTX::BI__hmma_m8n32k16_ld_a:
13201   case NVPTX::BI__hmma_m8n32k16_ld_b:
13202   case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
13203   case NVPTX::BI__hmma_m8n32k16_ld_c_f32: {
13204     Address Dst = EmitPointerWithAlignment(E->getArg(0));
13205     Value *Src = EmitScalarExpr(E->getArg(1));
13206     Value *Ldm = EmitScalarExpr(E->getArg(2));
13207     llvm::APSInt isColMajorArg;
13208     if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
13209       return nullptr;
13210     bool isColMajor = isColMajorArg.getSExtValue();
13211     unsigned IID;
13212     unsigned NumResults;
13213     switch (BuiltinID) {
13214     case NVPTX::BI__hmma_m16n16k16_ld_a:
13215       IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride
13216                        : Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride;
13217       NumResults = 8;
13218       break;
13219     case NVPTX::BI__hmma_m16n16k16_ld_b:
13220       IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride
13221                        : Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride;
13222       NumResults = 8;
13223       break;
13224     case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
13225       IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride
13226                        : Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride;
13227       NumResults = 4;
13228       break;
13229     case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
13230       IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride
13231                        : Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride;
13232       NumResults = 8;
13233       break;
13234     case NVPTX::BI__hmma_m32n8k16_ld_a:
13235       IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride
13236                        : Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride;
13237       NumResults = 8;
13238       break;
13239     case NVPTX::BI__hmma_m32n8k16_ld_b:
13240       IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride
13241                        : Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride;
13242       NumResults = 8;
13243       break;
13244     case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
13245       IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride
13246                        : Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride;
13247       NumResults = 4;
13248       break;
13249     case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
13250       IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride
13251                        : Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride;
13252       NumResults = 8;
13253       break;
13254     case NVPTX::BI__hmma_m8n32k16_ld_a:
13255       IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride
13256                        : Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride;
13257       NumResults = 8;
13258       break;
13259     case NVPTX::BI__hmma_m8n32k16_ld_b:
13260       IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride
13261                        : Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride;
13262       NumResults = 8;
13263       break;
13264     case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
13265       IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride
13266                        : Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride;
13267       NumResults = 4;
13268       break;
13269     case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
13270       IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride
13271                        : Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride;
13272       NumResults = 8;
13273       break;
13274     default:
13275       llvm_unreachable("Unexpected builtin ID.");
13276     }
13277     Value *Result =
13278         Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
13279 
13280     // Save returned values.
13281     for (unsigned i = 0; i < NumResults; ++i) {
13282       Builder.CreateAlignedStore(
13283           Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
13284                                 Dst.getElementType()),
13285           Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)),
13286           CharUnits::fromQuantity(4));
13287     }
13288     return Result;
13289   }
13290 
13291   case NVPTX::BI__hmma_m16n16k16_st_c_f16:
13292   case NVPTX::BI__hmma_m16n16k16_st_c_f32:
13293   case NVPTX::BI__hmma_m32n8k16_st_c_f16:
13294   case NVPTX::BI__hmma_m32n8k16_st_c_f32:
13295   case NVPTX::BI__hmma_m8n32k16_st_c_f16:
13296   case NVPTX::BI__hmma_m8n32k16_st_c_f32: {
13297     Value *Dst = EmitScalarExpr(E->getArg(0));
13298     Address Src = EmitPointerWithAlignment(E->getArg(1));
13299     Value *Ldm = EmitScalarExpr(E->getArg(2));
13300     llvm::APSInt isColMajorArg;
13301     if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
13302       return nullptr;
13303     bool isColMajor = isColMajorArg.getSExtValue();
13304     unsigned IID;
13305     unsigned NumResults = 8;
13306     // PTX Instructions (and LLVM intrinsics) are defined for slice _d_, yet
13307     // for some reason nvcc builtins use _c_.
13308     switch (BuiltinID) {
13309     case NVPTX::BI__hmma_m16n16k16_st_c_f16:
13310       IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride
13311                        : Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride;
13312       NumResults = 4;
13313       break;
13314     case NVPTX::BI__hmma_m16n16k16_st_c_f32:
13315       IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride
13316                        : Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride;
13317       break;
13318     case NVPTX::BI__hmma_m32n8k16_st_c_f16:
13319       IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride
13320                        : Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride;
13321       NumResults = 4;
13322       break;
13323     case NVPTX::BI__hmma_m32n8k16_st_c_f32:
13324       IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride
13325                        : Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride;
13326       break;
13327     case NVPTX::BI__hmma_m8n32k16_st_c_f16:
13328       IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride
13329                        : Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride;
13330       NumResults = 4;
13331       break;
13332     case NVPTX::BI__hmma_m8n32k16_st_c_f32:
13333       IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride
13334                        : Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride;
13335       break;
13336     default:
13337       llvm_unreachable("Unexpected builtin ID.");
13338     }
13339     Function *Intrinsic = CGM.getIntrinsic(IID, Dst->getType());
13340     llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
13341     SmallVector<Value *, 10> Values = {Dst};
13342     for (unsigned i = 0; i < NumResults; ++i) {
13343       Value *V = Builder.CreateAlignedLoad(
13344           Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)),
13345           CharUnits::fromQuantity(4));
13346       Values.push_back(Builder.CreateBitCast(V, ParamType));
13347     }
13348     Values.push_back(Ldm);
13349     Value *Result = Builder.CreateCall(Intrinsic, Values);
13350     return Result;
13351   }
13352 
13353   // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
13354   // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
13355   case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
13356   case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
13357   case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
13358   case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
13359   case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
13360   case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
13361   case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
13362   case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
13363   case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
13364   case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
13365   case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
13366   case NVPTX::BI__hmma_m8n32k16_mma_f16f32: {
13367     Address Dst = EmitPointerWithAlignment(E->getArg(0));
13368     Address SrcA = EmitPointerWithAlignment(E->getArg(1));
13369     Address SrcB = EmitPointerWithAlignment(E->getArg(2));
13370     Address SrcC = EmitPointerWithAlignment(E->getArg(3));
13371     llvm::APSInt LayoutArg;
13372     if (!E->getArg(4)->isIntegerConstantExpr(LayoutArg, getContext()))
13373       return nullptr;
13374     int Layout = LayoutArg.getSExtValue();
13375     if (Layout < 0 || Layout > 3)
13376       return nullptr;
13377     llvm::APSInt SatfArg;
13378     if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext()))
13379       return nullptr;
13380     bool Satf = SatfArg.getSExtValue();
13381 
13382     // clang-format off
13383 #define MMA_VARIANTS(geom, type) {{                                 \
13384       Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type,             \
13385       Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
13386       Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type,             \
13387       Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
13388       Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type,             \
13389       Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
13390       Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type,             \
13391       Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite  \
13392     }}
13393     // clang-format on
13394 
13395     auto getMMAIntrinsic = [Layout, Satf](std::array<unsigned, 8> Variants) {
13396       unsigned Index = Layout * 2 + Satf;
13397       assert(Index < 8);
13398       return Variants[Index];
13399     };
13400     unsigned IID;
13401     unsigned NumEltsC;
13402     unsigned NumEltsD;
13403     switch (BuiltinID) {
13404     case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
13405       IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f16));
13406       NumEltsC = 4;
13407       NumEltsD = 4;
13408       break;
13409     case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
13410       IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f16));
13411       NumEltsC = 4;
13412       NumEltsD = 8;
13413       break;
13414     case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
13415       IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f32));
13416       NumEltsC = 8;
13417       NumEltsD = 4;
13418       break;
13419     case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
13420       IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f32));
13421       NumEltsC = 8;
13422       NumEltsD = 8;
13423       break;
13424     case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
13425       IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f16));
13426       NumEltsC = 4;
13427       NumEltsD = 4;
13428       break;
13429     case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
13430       IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f16));
13431       NumEltsC = 4;
13432       NumEltsD = 8;
13433       break;
13434     case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
13435       IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f32));
13436       NumEltsC = 8;
13437       NumEltsD = 4;
13438       break;
13439     case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
13440       IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f32));
13441       NumEltsC = 8;
13442       NumEltsD = 8;
13443       break;
13444     case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
13445       IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f16));
13446       NumEltsC = 4;
13447       NumEltsD = 4;
13448       break;
13449     case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
13450       IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f16));
13451       NumEltsC = 4;
13452       NumEltsD = 8;
13453       break;
13454     case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
13455       IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f32));
13456       NumEltsC = 8;
13457       NumEltsD = 4;
13458       break;
13459     case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
13460       IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f32));
13461       NumEltsC = 8;
13462       NumEltsD = 8;
13463       break;
13464     default:
13465       llvm_unreachable("Unexpected builtin ID.");
13466     }
13467 #undef MMA_VARIANTS
13468 
13469     SmallVector<Value *, 24> Values;
13470     Function *Intrinsic = CGM.getIntrinsic(IID);
13471     llvm::Type *ABType = Intrinsic->getFunctionType()->getParamType(0);
13472     // Load A
13473     for (unsigned i = 0; i < 8; ++i) {
13474       Value *V = Builder.CreateAlignedLoad(
13475           Builder.CreateGEP(SrcA.getPointer(),
13476                             llvm::ConstantInt::get(IntTy, i)),
13477           CharUnits::fromQuantity(4));
13478       Values.push_back(Builder.CreateBitCast(V, ABType));
13479     }
13480     // Load B
13481     for (unsigned i = 0; i < 8; ++i) {
13482       Value *V = Builder.CreateAlignedLoad(
13483           Builder.CreateGEP(SrcB.getPointer(),
13484                             llvm::ConstantInt::get(IntTy, i)),
13485           CharUnits::fromQuantity(4));
13486       Values.push_back(Builder.CreateBitCast(V, ABType));
13487     }
13488     // Load C
13489     llvm::Type *CType = Intrinsic->getFunctionType()->getParamType(16);
13490     for (unsigned i = 0; i < NumEltsC; ++i) {
13491       Value *V = Builder.CreateAlignedLoad(
13492           Builder.CreateGEP(SrcC.getPointer(),
13493                             llvm::ConstantInt::get(IntTy, i)),
13494           CharUnits::fromQuantity(4));
13495       Values.push_back(Builder.CreateBitCast(V, CType));
13496     }
13497     Value *Result = Builder.CreateCall(Intrinsic, Values);
13498     llvm::Type *DType = Dst.getElementType();
13499     for (unsigned i = 0; i < NumEltsD; ++i)
13500       Builder.CreateAlignedStore(
13501           Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
13502           Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)),
13503           CharUnits::fromQuantity(4));
13504     return Result;
13505   }
13506   default:
13507     return nullptr;
13508   }
13509 }
13510 
13511 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
13512                                                    const CallExpr *E) {
13513   switch (BuiltinID) {
13514   case WebAssembly::BI__builtin_wasm_memory_size: {
13515     llvm::Type *ResultType = ConvertType(E->getType());
13516     Value *I = EmitScalarExpr(E->getArg(0));
13517     Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
13518     return Builder.CreateCall(Callee, I);
13519   }
13520   case WebAssembly::BI__builtin_wasm_memory_grow: {
13521     llvm::Type *ResultType = ConvertType(E->getType());
13522     Value *Args[] = {
13523       EmitScalarExpr(E->getArg(0)),
13524       EmitScalarExpr(E->getArg(1))
13525     };
13526     Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
13527     return Builder.CreateCall(Callee, Args);
13528   }
13529   case WebAssembly::BI__builtin_wasm_memory_init: {
13530     llvm::APSInt SegConst;
13531     if (!E->getArg(0)->isIntegerConstantExpr(SegConst, getContext()))
13532       llvm_unreachable("Constant arg isn't actually constant?");
13533     llvm::APSInt MemConst;
13534     if (!E->getArg(1)->isIntegerConstantExpr(MemConst, getContext()))
13535       llvm_unreachable("Constant arg isn't actually constant?");
13536     if (!MemConst.isNullValue())
13537       ErrorUnsupported(E, "non-zero memory index");
13538     Value *Args[] = {llvm::ConstantInt::get(getLLVMContext(), SegConst),
13539                      llvm::ConstantInt::get(getLLVMContext(), MemConst),
13540                      EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)),
13541                      EmitScalarExpr(E->getArg(4))};
13542     Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_init);
13543     return Builder.CreateCall(Callee, Args);
13544   }
13545   case WebAssembly::BI__builtin_wasm_data_drop: {
13546     llvm::APSInt SegConst;
13547     if (!E->getArg(0)->isIntegerConstantExpr(SegConst, getContext()))
13548       llvm_unreachable("Constant arg isn't actually constant?");
13549     Value *Arg = llvm::ConstantInt::get(getLLVMContext(), SegConst);
13550     Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_data_drop);
13551     return Builder.CreateCall(Callee, {Arg});
13552   }
13553   case WebAssembly::BI__builtin_wasm_throw: {
13554     Value *Tag = EmitScalarExpr(E->getArg(0));
13555     Value *Obj = EmitScalarExpr(E->getArg(1));
13556     Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
13557     return Builder.CreateCall(Callee, {Tag, Obj});
13558   }
13559   case WebAssembly::BI__builtin_wasm_rethrow_in_catch: {
13560     Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow_in_catch);
13561     return Builder.CreateCall(Callee);
13562   }
13563   case WebAssembly::BI__builtin_wasm_atomic_wait_i32: {
13564     Value *Addr = EmitScalarExpr(E->getArg(0));
13565     Value *Expected = EmitScalarExpr(E->getArg(1));
13566     Value *Timeout = EmitScalarExpr(E->getArg(2));
13567     Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i32);
13568     return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
13569   }
13570   case WebAssembly::BI__builtin_wasm_atomic_wait_i64: {
13571     Value *Addr = EmitScalarExpr(E->getArg(0));
13572     Value *Expected = EmitScalarExpr(E->getArg(1));
13573     Value *Timeout = EmitScalarExpr(E->getArg(2));
13574     Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i64);
13575     return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
13576   }
13577   case WebAssembly::BI__builtin_wasm_atomic_notify: {
13578     Value *Addr = EmitScalarExpr(E->getArg(0));
13579     Value *Count = EmitScalarExpr(E->getArg(1));
13580     Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify);
13581     return Builder.CreateCall(Callee, {Addr, Count});
13582   }
13583   case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
13584   case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
13585   case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
13586   case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
13587   case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4:
13588   case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64x2_f64x2: {
13589     Value *Src = EmitScalarExpr(E->getArg(0));
13590     llvm::Type *ResT = ConvertType(E->getType());
13591     Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed,
13592                                      {ResT, Src->getType()});
13593     return Builder.CreateCall(Callee, {Src});
13594   }
13595   case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
13596   case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
13597   case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
13598   case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
13599   case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4:
13600   case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64x2_f64x2: {
13601     Value *Src = EmitScalarExpr(E->getArg(0));
13602     llvm::Type *ResT = ConvertType(E->getType());
13603     Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned,
13604                                      {ResT, Src->getType()});
13605     return Builder.CreateCall(Callee, {Src});
13606   }
13607   case WebAssembly::BI__builtin_wasm_min_f32:
13608   case WebAssembly::BI__builtin_wasm_min_f64:
13609   case WebAssembly::BI__builtin_wasm_min_f32x4:
13610   case WebAssembly::BI__builtin_wasm_min_f64x2: {
13611     Value *LHS = EmitScalarExpr(E->getArg(0));
13612     Value *RHS = EmitScalarExpr(E->getArg(1));
13613     Function *Callee = CGM.getIntrinsic(Intrinsic::minimum,
13614                                      ConvertType(E->getType()));
13615     return Builder.CreateCall(Callee, {LHS, RHS});
13616   }
13617   case WebAssembly::BI__builtin_wasm_max_f32:
13618   case WebAssembly::BI__builtin_wasm_max_f64:
13619   case WebAssembly::BI__builtin_wasm_max_f32x4:
13620   case WebAssembly::BI__builtin_wasm_max_f64x2: {
13621     Value *LHS = EmitScalarExpr(E->getArg(0));
13622     Value *RHS = EmitScalarExpr(E->getArg(1));
13623     Function *Callee = CGM.getIntrinsic(Intrinsic::maximum,
13624                                      ConvertType(E->getType()));
13625     return Builder.CreateCall(Callee, {LHS, RHS});
13626   }
13627   case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16:
13628   case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16:
13629   case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8:
13630   case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8:
13631   case WebAssembly::BI__builtin_wasm_extract_lane_i32x4:
13632   case WebAssembly::BI__builtin_wasm_extract_lane_i64x2:
13633   case WebAssembly::BI__builtin_wasm_extract_lane_f32x4:
13634   case WebAssembly::BI__builtin_wasm_extract_lane_f64x2: {
13635     llvm::APSInt LaneConst;
13636     if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext()))
13637       llvm_unreachable("Constant arg isn't actually constant?");
13638     Value *Vec = EmitScalarExpr(E->getArg(0));
13639     Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
13640     Value *Extract = Builder.CreateExtractElement(Vec, Lane);
13641     switch (BuiltinID) {
13642     case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16:
13643     case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8:
13644       return Builder.CreateSExt(Extract, ConvertType(E->getType()));
13645     case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16:
13646     case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8:
13647       return Builder.CreateZExt(Extract, ConvertType(E->getType()));
13648     case WebAssembly::BI__builtin_wasm_extract_lane_i32x4:
13649     case WebAssembly::BI__builtin_wasm_extract_lane_i64x2:
13650     case WebAssembly::BI__builtin_wasm_extract_lane_f32x4:
13651     case WebAssembly::BI__builtin_wasm_extract_lane_f64x2:
13652       return Extract;
13653     default:
13654       llvm_unreachable("unexpected builtin ID");
13655     }
13656   }
13657   case WebAssembly::BI__builtin_wasm_replace_lane_i8x16:
13658   case WebAssembly::BI__builtin_wasm_replace_lane_i16x8:
13659   case WebAssembly::BI__builtin_wasm_replace_lane_i32x4:
13660   case WebAssembly::BI__builtin_wasm_replace_lane_i64x2:
13661   case WebAssembly::BI__builtin_wasm_replace_lane_f32x4:
13662   case WebAssembly::BI__builtin_wasm_replace_lane_f64x2: {
13663     llvm::APSInt LaneConst;
13664     if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext()))
13665       llvm_unreachable("Constant arg isn't actually constant?");
13666     Value *Vec = EmitScalarExpr(E->getArg(0));
13667     Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
13668     Value *Val = EmitScalarExpr(E->getArg(2));
13669     switch (BuiltinID) {
13670     case WebAssembly::BI__builtin_wasm_replace_lane_i8x16:
13671     case WebAssembly::BI__builtin_wasm_replace_lane_i16x8: {
13672       llvm::Type *ElemType = ConvertType(E->getType())->getVectorElementType();
13673       Value *Trunc = Builder.CreateTrunc(Val, ElemType);
13674       return Builder.CreateInsertElement(Vec, Trunc, Lane);
13675     }
13676     case WebAssembly::BI__builtin_wasm_replace_lane_i32x4:
13677     case WebAssembly::BI__builtin_wasm_replace_lane_i64x2:
13678     case WebAssembly::BI__builtin_wasm_replace_lane_f32x4:
13679     case WebAssembly::BI__builtin_wasm_replace_lane_f64x2:
13680       return Builder.CreateInsertElement(Vec, Val, Lane);
13681     default:
13682       llvm_unreachable("unexpected builtin ID");
13683     }
13684   }
13685   case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16:
13686   case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16:
13687   case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8:
13688   case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8:
13689   case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16:
13690   case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16:
13691   case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8:
13692   case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8: {
13693     unsigned IntNo;
13694     switch (BuiltinID) {
13695     case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16:
13696     case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8:
13697       IntNo = Intrinsic::sadd_sat;
13698       break;
13699     case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16:
13700     case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8:
13701       IntNo = Intrinsic::uadd_sat;
13702       break;
13703     case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16:
13704     case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8:
13705       IntNo = Intrinsic::wasm_sub_saturate_signed;
13706       break;
13707     case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16:
13708     case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8:
13709       IntNo = Intrinsic::wasm_sub_saturate_unsigned;
13710       break;
13711     default:
13712       llvm_unreachable("unexpected builtin ID");
13713     }
13714     Value *LHS = EmitScalarExpr(E->getArg(0));
13715     Value *RHS = EmitScalarExpr(E->getArg(1));
13716     Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
13717     return Builder.CreateCall(Callee, {LHS, RHS});
13718   }
13719   case WebAssembly::BI__builtin_wasm_bitselect: {
13720     Value *V1 = EmitScalarExpr(E->getArg(0));
13721     Value *V2 = EmitScalarExpr(E->getArg(1));
13722     Value *C = EmitScalarExpr(E->getArg(2));
13723     Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect,
13724                                      ConvertType(E->getType()));
13725     return Builder.CreateCall(Callee, {V1, V2, C});
13726   }
13727   case WebAssembly::BI__builtin_wasm_any_true_i8x16:
13728   case WebAssembly::BI__builtin_wasm_any_true_i16x8:
13729   case WebAssembly::BI__builtin_wasm_any_true_i32x4:
13730   case WebAssembly::BI__builtin_wasm_any_true_i64x2:
13731   case WebAssembly::BI__builtin_wasm_all_true_i8x16:
13732   case WebAssembly::BI__builtin_wasm_all_true_i16x8:
13733   case WebAssembly::BI__builtin_wasm_all_true_i32x4:
13734   case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
13735     unsigned IntNo;
13736     switch (BuiltinID) {
13737     case WebAssembly::BI__builtin_wasm_any_true_i8x16:
13738     case WebAssembly::BI__builtin_wasm_any_true_i16x8:
13739     case WebAssembly::BI__builtin_wasm_any_true_i32x4:
13740     case WebAssembly::BI__builtin_wasm_any_true_i64x2:
13741       IntNo = Intrinsic::wasm_anytrue;
13742       break;
13743     case WebAssembly::BI__builtin_wasm_all_true_i8x16:
13744     case WebAssembly::BI__builtin_wasm_all_true_i16x8:
13745     case WebAssembly::BI__builtin_wasm_all_true_i32x4:
13746     case WebAssembly::BI__builtin_wasm_all_true_i64x2:
13747       IntNo = Intrinsic::wasm_alltrue;
13748       break;
13749     default:
13750       llvm_unreachable("unexpected builtin ID");
13751     }
13752     Value *Vec = EmitScalarExpr(E->getArg(0));
13753     Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
13754     return Builder.CreateCall(Callee, {Vec});
13755   }
13756   case WebAssembly::BI__builtin_wasm_abs_f32x4:
13757   case WebAssembly::BI__builtin_wasm_abs_f64x2: {
13758     Value *Vec = EmitScalarExpr(E->getArg(0));
13759     Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
13760     return Builder.CreateCall(Callee, {Vec});
13761   }
13762   case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
13763   case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
13764     Value *Vec = EmitScalarExpr(E->getArg(0));
13765     Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
13766     return Builder.CreateCall(Callee, {Vec});
13767   }
13768 
13769   default:
13770     return nullptr;
13771   }
13772 }
13773 
13774 Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
13775                                                const CallExpr *E) {
13776   SmallVector<llvm::Value *, 4> Ops;
13777   Intrinsic::ID ID = Intrinsic::not_intrinsic;
13778 
13779   auto MakeCircLd = [&](unsigned IntID, bool HasImm) {
13780     // The base pointer is passed by address, so it needs to be loaded.
13781     Address BP = EmitPointerWithAlignment(E->getArg(0));
13782     BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy),
13783                  BP.getAlignment());
13784     llvm::Value *Base = Builder.CreateLoad(BP);
13785     // Operands are Base, Increment, Modifier, Start.
13786     if (HasImm)
13787       Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)),
13788               EmitScalarExpr(E->getArg(3)) };
13789     else
13790       Ops = { Base, EmitScalarExpr(E->getArg(1)),
13791               EmitScalarExpr(E->getArg(2)) };
13792 
13793     llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
13794     llvm::Value *NewBase = Builder.CreateExtractValue(Result, 1);
13795     llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
13796                                             NewBase->getType()->getPointerTo());
13797     Address Dest = EmitPointerWithAlignment(E->getArg(0));
13798     // The intrinsic generates two results. The new value for the base pointer
13799     // needs to be stored.
13800     Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
13801     return Builder.CreateExtractValue(Result, 0);
13802   };
13803 
13804   auto MakeCircSt = [&](unsigned IntID, bool HasImm) {
13805     // The base pointer is passed by address, so it needs to be loaded.
13806     Address BP = EmitPointerWithAlignment(E->getArg(0));
13807     BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy),
13808                  BP.getAlignment());
13809     llvm::Value *Base = Builder.CreateLoad(BP);
13810     // Operands are Base, Increment, Modifier, Value, Start.
13811     if (HasImm)
13812       Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)),
13813               EmitScalarExpr(E->getArg(3)), EmitScalarExpr(E->getArg(4)) };
13814     else
13815       Ops = { Base, EmitScalarExpr(E->getArg(1)),
13816               EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)) };
13817 
13818     llvm::Value *NewBase = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
13819     llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
13820                                             NewBase->getType()->getPointerTo());
13821     Address Dest = EmitPointerWithAlignment(E->getArg(0));
13822     // The intrinsic generates one result, which is the new value for the base
13823     // pointer. It needs to be stored.
13824     return Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
13825   };
13826 
13827   // Handle the conversion of bit-reverse load intrinsics to bit code.
13828   // The intrinsic call after this function only reads from memory and the
13829   // write to memory is dealt by the store instruction.
13830   auto MakeBrevLd = [&](unsigned IntID, llvm::Type *DestTy) {
13831     // The intrinsic generates one result, which is the new value for the base
13832     // pointer. It needs to be returned. The result of the load instruction is
13833     // passed to intrinsic by address, so the value needs to be stored.
13834     llvm::Value *BaseAddress =
13835         Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
13836 
13837     // Expressions like &(*pt++) will be incremented per evaluation.
13838     // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
13839     // per call.
13840     Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
13841     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), Int8PtrTy),
13842                        DestAddr.getAlignment());
13843     llvm::Value *DestAddress = DestAddr.getPointer();
13844 
13845     // Operands are Base, Dest, Modifier.
13846     // The intrinsic format in LLVM IR is defined as
13847     // { ValueType, i8* } (i8*, i32).
13848     Ops = {BaseAddress, EmitScalarExpr(E->getArg(2))};
13849 
13850     llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
13851     // The value needs to be stored as the variable is passed by reference.
13852     llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
13853 
13854     // The store needs to be truncated to fit the destination type.
13855     // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
13856     // to be handled with stores of respective destination type.
13857     DestVal = Builder.CreateTrunc(DestVal, DestTy);
13858 
13859     llvm::Value *DestForStore =
13860         Builder.CreateBitCast(DestAddress, DestVal->getType()->getPointerTo());
13861     Builder.CreateAlignedStore(DestVal, DestForStore, DestAddr.getAlignment());
13862     // The updated value of the base pointer is returned.
13863     return Builder.CreateExtractValue(Result, 1);
13864   };
13865 
13866   switch (BuiltinID) {
13867   case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
13868   case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: {
13869     Address Dest = EmitPointerWithAlignment(E->getArg(2));
13870     unsigned Size;
13871     if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vaddcarry) {
13872       Size = 512;
13873       ID = Intrinsic::hexagon_V6_vaddcarry;
13874     } else {
13875       Size = 1024;
13876       ID = Intrinsic::hexagon_V6_vaddcarry_128B;
13877     }
13878     Dest = Builder.CreateBitCast(Dest,
13879         llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
13880     LoadInst *QLd = Builder.CreateLoad(Dest);
13881     Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
13882     llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
13883     llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
13884     llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
13885                                               Vprd->getType()->getPointerTo(0));
13886     Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
13887     return Builder.CreateExtractValue(Result, 0);
13888   }
13889   case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
13890   case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
13891     Address Dest = EmitPointerWithAlignment(E->getArg(2));
13892     unsigned Size;
13893     if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vsubcarry) {
13894       Size = 512;
13895       ID = Intrinsic::hexagon_V6_vsubcarry;
13896     } else {
13897       Size = 1024;
13898       ID = Intrinsic::hexagon_V6_vsubcarry_128B;
13899     }
13900     Dest = Builder.CreateBitCast(Dest,
13901         llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
13902     LoadInst *QLd = Builder.CreateLoad(Dest);
13903     Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
13904     llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
13905     llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
13906     llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
13907                                               Vprd->getType()->getPointerTo(0));
13908     Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
13909     return Builder.CreateExtractValue(Result, 0);
13910   }
13911   case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
13912     return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pci, /*HasImm*/true);
13913   case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
13914     return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pci,  /*HasImm*/true);
13915   case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
13916     return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pci, /*HasImm*/true);
13917   case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
13918     return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pci,  /*HasImm*/true);
13919   case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
13920     return MakeCircLd(Intrinsic::hexagon_L2_loadri_pci,  /*HasImm*/true);
13921   case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
13922     return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pci,  /*HasImm*/true);
13923   case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
13924     return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pcr, /*HasImm*/false);
13925   case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
13926     return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pcr,  /*HasImm*/false);
13927   case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
13928     return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pcr, /*HasImm*/false);
13929   case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
13930     return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pcr,  /*HasImm*/false);
13931   case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
13932     return MakeCircLd(Intrinsic::hexagon_L2_loadri_pcr,  /*HasImm*/false);
13933   case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
13934     return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pcr,  /*HasImm*/false);
13935   case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
13936     return MakeCircSt(Intrinsic::hexagon_S2_storerb_pci, /*HasImm*/true);
13937   case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
13938     return MakeCircSt(Intrinsic::hexagon_S2_storerh_pci, /*HasImm*/true);
13939   case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
13940     return MakeCircSt(Intrinsic::hexagon_S2_storerf_pci, /*HasImm*/true);
13941   case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
13942     return MakeCircSt(Intrinsic::hexagon_S2_storeri_pci, /*HasImm*/true);
13943   case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
13944     return MakeCircSt(Intrinsic::hexagon_S2_storerd_pci, /*HasImm*/true);
13945   case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
13946     return MakeCircSt(Intrinsic::hexagon_S2_storerb_pcr, /*HasImm*/false);
13947   case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
13948     return MakeCircSt(Intrinsic::hexagon_S2_storerh_pcr, /*HasImm*/false);
13949   case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
13950     return MakeCircSt(Intrinsic::hexagon_S2_storerf_pcr, /*HasImm*/false);
13951   case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
13952     return MakeCircSt(Intrinsic::hexagon_S2_storeri_pcr, /*HasImm*/false);
13953   case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
13954     return MakeCircSt(Intrinsic::hexagon_S2_storerd_pcr, /*HasImm*/false);
13955   case Hexagon::BI__builtin_brev_ldub:
13956     return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
13957   case Hexagon::BI__builtin_brev_ldb:
13958     return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
13959   case Hexagon::BI__builtin_brev_lduh:
13960     return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
13961   case Hexagon::BI__builtin_brev_ldh:
13962     return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
13963   case Hexagon::BI__builtin_brev_ldw:
13964     return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
13965   case Hexagon::BI__builtin_brev_ldd:
13966     return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
13967   default:
13968     break;
13969   } // switch
13970 
13971   return nullptr;
13972 }
13973