1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CodeGenFunction.h"
15 #include "CGCXXABI.h"
16 #include "CGObjCRuntime.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/ASTContext.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/Basic/TargetBuiltins.h"
22 #include "clang/Basic/TargetInfo.h"
23 #include "clang/CodeGen/CGFunctionInfo.h"
24 #include "clang/Sema/SemaDiagnostic.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/IR/CallSite.h"
27 #include "llvm/IR/DataLayout.h"
28 #include "llvm/IR/InlineAsm.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include <sstream>
31 
32 using namespace clang;
33 using namespace CodeGen;
34 using namespace llvm;
35 
36 /// getBuiltinLibFunction - Given a builtin id for a function like
37 /// "__builtin_fabsf", return a Function* for "fabsf".
38 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
39                                                   unsigned BuiltinID) {
40   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
41 
42   // Get the name, skip over the __builtin_ prefix (if necessary).
43   StringRef Name;
44   GlobalDecl D(FD);
45 
46   // If the builtin has been declared explicitly with an assembler label,
47   // use the mangled name. This differs from the plain label on platforms
48   // that prefix labels.
49   if (FD->hasAttr<AsmLabelAttr>())
50     Name = getMangledName(D);
51   else
52     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
53 
54   llvm::FunctionType *Ty =
55     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
56 
57   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
58 }
59 
60 /// Emit the conversions required to turn the given value into an
61 /// integer of the given size.
62 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
63                         QualType T, llvm::IntegerType *IntType) {
64   V = CGF.EmitToMemory(V, T);
65 
66   if (V->getType()->isPointerTy())
67     return CGF.Builder.CreatePtrToInt(V, IntType);
68 
69   assert(V->getType() == IntType);
70   return V;
71 }
72 
73 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
74                           QualType T, llvm::Type *ResultType) {
75   V = CGF.EmitFromMemory(V, T);
76 
77   if (ResultType->isPointerTy())
78     return CGF.Builder.CreateIntToPtr(V, ResultType);
79 
80   assert(V->getType() == ResultType);
81   return V;
82 }
83 
84 /// Utility to insert an atomic instruction based on Instrinsic::ID
85 /// and the expression node.
86 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
87                                     llvm::AtomicRMWInst::BinOp Kind,
88                                     const CallExpr *E) {
89   QualType T = E->getType();
90   assert(E->getArg(0)->getType()->isPointerType());
91   assert(CGF.getContext().hasSameUnqualifiedType(T,
92                                   E->getArg(0)->getType()->getPointeeType()));
93   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
94 
95   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
96   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
97 
98   llvm::IntegerType *IntType =
99     llvm::IntegerType::get(CGF.getLLVMContext(),
100                            CGF.getContext().getTypeSize(T));
101   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
102 
103   llvm::Value *Args[2];
104   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
105   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
106   llvm::Type *ValueType = Args[1]->getType();
107   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
108 
109   llvm::Value *Result =
110       CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
111                                   llvm::SequentiallyConsistent);
112   return EmitFromInt(CGF, Result, T, ValueType);
113 }
114 
115 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
116   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
117   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
118 
119   // Convert the type of the pointer to a pointer to the stored type.
120   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
121   Value *BC = CGF.Builder.CreateBitCast(
122       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
123   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
124   LV.setNontemporal(true);
125   CGF.EmitStoreOfScalar(Val, LV, false);
126   return nullptr;
127 }
128 
129 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
130   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
131 
132   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
133   LV.setNontemporal(true);
134   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
135 }
136 
137 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
138                                llvm::AtomicRMWInst::BinOp Kind,
139                                const CallExpr *E) {
140   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
141 }
142 
143 /// Utility to insert an atomic instruction based Instrinsic::ID and
144 /// the expression node, where the return value is the result of the
145 /// operation.
146 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
147                                    llvm::AtomicRMWInst::BinOp Kind,
148                                    const CallExpr *E,
149                                    Instruction::BinaryOps Op,
150                                    bool Invert = false) {
151   QualType T = E->getType();
152   assert(E->getArg(0)->getType()->isPointerType());
153   assert(CGF.getContext().hasSameUnqualifiedType(T,
154                                   E->getArg(0)->getType()->getPointeeType()));
155   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
156 
157   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
158   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
159 
160   llvm::IntegerType *IntType =
161     llvm::IntegerType::get(CGF.getLLVMContext(),
162                            CGF.getContext().getTypeSize(T));
163   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
164 
165   llvm::Value *Args[2];
166   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
167   llvm::Type *ValueType = Args[1]->getType();
168   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
169   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
170 
171   llvm::Value *Result =
172       CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
173                                   llvm::SequentiallyConsistent);
174   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
175   if (Invert)
176     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
177                                      llvm::ConstantInt::get(IntType, -1));
178   Result = EmitFromInt(CGF, Result, T, ValueType);
179   return RValue::get(Result);
180 }
181 
182 /// @brief Utility to insert an atomic cmpxchg instruction.
183 ///
184 /// @param CGF The current codegen function.
185 /// @param E   Builtin call expression to convert to cmpxchg.
186 ///            arg0 - address to operate on
187 ///            arg1 - value to compare with
188 ///            arg2 - new value
189 /// @param ReturnBool Specifies whether to return success flag of
190 ///                   cmpxchg result or the old value.
191 ///
192 /// @returns result of cmpxchg, according to ReturnBool
193 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
194                                      bool ReturnBool) {
195   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
196   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
197   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
198 
199   llvm::IntegerType *IntType = llvm::IntegerType::get(
200       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
201   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
202 
203   Value *Args[3];
204   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
205   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
206   llvm::Type *ValueType = Args[1]->getType();
207   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
208   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
209 
210   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
211                                                 llvm::SequentiallyConsistent,
212                                                 llvm::SequentiallyConsistent);
213   if (ReturnBool)
214     // Extract boolean success flag and zext it to int.
215     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
216                                   CGF.ConvertType(E->getType()));
217   else
218     // Extract old value and emit it using the same type as compare value.
219     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
220                        ValueType);
221 }
222 
223 /// EmitFAbs - Emit a call to @llvm.fabs().
224 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
225   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
226   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
227   Call->setDoesNotAccessMemory();
228   return Call;
229 }
230 
231 /// Emit the computation of the sign bit for a floating point value. Returns
232 /// the i1 sign bit value.
233 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
234   LLVMContext &C = CGF.CGM.getLLVMContext();
235 
236   llvm::Type *Ty = V->getType();
237   int Width = Ty->getPrimitiveSizeInBits();
238   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
239   V = CGF.Builder.CreateBitCast(V, IntTy);
240   if (Ty->isPPC_FP128Ty()) {
241     // The higher-order double comes first, and so we need to truncate the
242     // pair to extract the overall sign. The order of the pair is the same
243     // in both little- and big-Endian modes.
244     Width >>= 1;
245     IntTy = llvm::IntegerType::get(C, Width);
246     V = CGF.Builder.CreateTrunc(V, IntTy);
247   }
248   Value *Zero = llvm::Constant::getNullValue(IntTy);
249   return CGF.Builder.CreateICmpSLT(V, Zero);
250 }
251 
252 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
253                               const CallExpr *E, llvm::Value *calleeValue) {
254   return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E,
255                       ReturnValueSlot(), Fn);
256 }
257 
258 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
259 /// depending on IntrinsicID.
260 ///
261 /// \arg CGF The current codegen function.
262 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
263 /// \arg X The first argument to the llvm.*.with.overflow.*.
264 /// \arg Y The second argument to the llvm.*.with.overflow.*.
265 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
266 /// \returns The result (i.e. sum/product) returned by the intrinsic.
267 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
268                                           const llvm::Intrinsic::ID IntrinsicID,
269                                           llvm::Value *X, llvm::Value *Y,
270                                           llvm::Value *&Carry) {
271   // Make sure we have integers of the same width.
272   assert(X->getType() == Y->getType() &&
273          "Arguments must be the same type. (Did you forget to make sure both "
274          "arguments have the same integer width?)");
275 
276   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
277   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
278   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
279   return CGF.Builder.CreateExtractValue(Tmp, 0);
280 }
281 
282 namespace {
283   struct WidthAndSignedness {
284     unsigned Width;
285     bool Signed;
286   };
287 }
288 
289 static WidthAndSignedness
290 getIntegerWidthAndSignedness(const clang::ASTContext &context,
291                              const clang::QualType Type) {
292   assert(Type->isIntegerType() && "Given type is not an integer.");
293   unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
294   bool Signed = Type->isSignedIntegerType();
295   return {Width, Signed};
296 }
297 
298 // Given one or more integer types, this function produces an integer type that
299 // encompasses them: any value in one of the given types could be expressed in
300 // the encompassing type.
301 static struct WidthAndSignedness
302 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
303   assert(Types.size() > 0 && "Empty list of types.");
304 
305   // If any of the given types is signed, we must return a signed type.
306   bool Signed = false;
307   for (const auto &Type : Types) {
308     Signed |= Type.Signed;
309   }
310 
311   // The encompassing type must have a width greater than or equal to the width
312   // of the specified types.  Aditionally, if the encompassing type is signed,
313   // its width must be strictly greater than the width of any unsigned types
314   // given.
315   unsigned Width = 0;
316   for (const auto &Type : Types) {
317     unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
318     if (Width < MinWidth) {
319       Width = MinWidth;
320     }
321   }
322 
323   return {Width, Signed};
324 }
325 
326 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
327   llvm::Type *DestType = Int8PtrTy;
328   if (ArgValue->getType() != DestType)
329     ArgValue =
330         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
331 
332   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
333   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
334 }
335 
336 // Returns true if we have a valid set of target features.
337 bool CodeGenFunction::checkBuiltinTargetFeatures(
338     const FunctionDecl *TargetDecl) {
339   // Early exit if this is an indirect call.
340   if (!TargetDecl)
341     return true;
342 
343   // Get the current enclosing function if it exists. If it doesn't
344   // we can't check the target features anyhow.
345   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl);
346   if (!FD) return true;
347 
348   unsigned BuiltinID = TargetDecl->getBuiltinID();
349   const char *FeatureList =
350       CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID);
351 
352   if (!FeatureList || StringRef(FeatureList) == "")
353     return true;
354 
355   StringRef TargetCPU = Target.getTargetOpts().CPU;
356   llvm::StringMap<bool> FeatureMap;
357 
358   if (const auto *TD = FD->getAttr<TargetAttr>()) {
359     // If we have a TargetAttr build up the feature map based on that.
360     TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse();
361 
362     // Make a copy of the features as passed on the command line into the
363     // beginning of the additional features from the function to override.
364     ParsedAttr.first.insert(ParsedAttr.first.begin(),
365                             Target.getTargetOpts().FeaturesAsWritten.begin(),
366                             Target.getTargetOpts().FeaturesAsWritten.end());
367 
368     if (ParsedAttr.second != "")
369       TargetCPU = ParsedAttr.second;
370 
371     // Now populate the feature map, first with the TargetCPU which is either
372     // the default or a new one from the target attribute string. Then we'll use
373     // the passed in features (FeaturesAsWritten) along with the new ones from
374     // the attribute.
375     Target.initFeatureMap(FeatureMap, CGM.getDiags(), TargetCPU,
376                           ParsedAttr.first);
377   } else {
378     Target.initFeatureMap(FeatureMap, CGM.getDiags(), TargetCPU,
379                           Target.getTargetOpts().Features);
380   }
381 
382   // If we have at least one of the features in the feature list return
383   // true, otherwise return false.
384   SmallVector<StringRef, 1> AttrFeatures;
385   StringRef(FeatureList).split(AttrFeatures, ",");
386   return std::all_of(AttrFeatures.begin(), AttrFeatures.end(),
387                      [&](StringRef &Feature) {
388                        SmallVector<StringRef, 1> OrFeatures;
389                        Feature.split(OrFeatures, "|");
390                        return std::any_of(OrFeatures.begin(), OrFeatures.end(),
391                                           [&](StringRef &Feature) {
392                                             return FeatureMap[Feature];
393                                           });
394                      });
395 }
396 
397 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
398                                         unsigned BuiltinID, const CallExpr *E,
399                                         ReturnValueSlot ReturnValue) {
400   // See if we can constant fold this builtin.  If so, don't emit it at all.
401   Expr::EvalResult Result;
402   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
403       !Result.hasSideEffects()) {
404     if (Result.Val.isInt())
405       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
406                                                 Result.Val.getInt()));
407     if (Result.Val.isFloat())
408       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
409                                                Result.Val.getFloat()));
410   }
411 
412   switch (BuiltinID) {
413   default: break;  // Handle intrinsics and libm functions below.
414   case Builtin::BI__builtin___CFStringMakeConstantString:
415   case Builtin::BI__builtin___NSStringMakeConstantString:
416     return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
417   case Builtin::BI__builtin_stdarg_start:
418   case Builtin::BI__builtin_va_start:
419   case Builtin::BI__va_start:
420   case Builtin::BI__builtin_va_end:
421     return RValue::get(
422         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
423                            ? EmitScalarExpr(E->getArg(0))
424                            : EmitVAListRef(E->getArg(0)).getPointer(),
425                        BuiltinID != Builtin::BI__builtin_va_end));
426   case Builtin::BI__builtin_va_copy: {
427     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
428     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
429 
430     llvm::Type *Type = Int8PtrTy;
431 
432     DstPtr = Builder.CreateBitCast(DstPtr, Type);
433     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
434     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
435                                           {DstPtr, SrcPtr}));
436   }
437   case Builtin::BI__builtin_abs:
438   case Builtin::BI__builtin_labs:
439   case Builtin::BI__builtin_llabs: {
440     Value *ArgValue = EmitScalarExpr(E->getArg(0));
441 
442     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
443     Value *CmpResult =
444     Builder.CreateICmpSGE(ArgValue,
445                           llvm::Constant::getNullValue(ArgValue->getType()),
446                                                             "abscond");
447     Value *Result =
448       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
449 
450     return RValue::get(Result);
451   }
452   case Builtin::BI__builtin_fabs:
453   case Builtin::BI__builtin_fabsf:
454   case Builtin::BI__builtin_fabsl: {
455     Value *Arg1 = EmitScalarExpr(E->getArg(0));
456     Value *Result = EmitFAbs(*this, Arg1);
457     return RValue::get(Result);
458   }
459   case Builtin::BI__builtin_fmod:
460   case Builtin::BI__builtin_fmodf:
461   case Builtin::BI__builtin_fmodl: {
462     Value *Arg1 = EmitScalarExpr(E->getArg(0));
463     Value *Arg2 = EmitScalarExpr(E->getArg(1));
464     Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
465     return RValue::get(Result);
466   }
467 
468   case Builtin::BI__builtin_conj:
469   case Builtin::BI__builtin_conjf:
470   case Builtin::BI__builtin_conjl: {
471     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
472     Value *Real = ComplexVal.first;
473     Value *Imag = ComplexVal.second;
474     Value *Zero =
475       Imag->getType()->isFPOrFPVectorTy()
476         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
477         : llvm::Constant::getNullValue(Imag->getType());
478 
479     Imag = Builder.CreateFSub(Zero, Imag, "sub");
480     return RValue::getComplex(std::make_pair(Real, Imag));
481   }
482   case Builtin::BI__builtin_creal:
483   case Builtin::BI__builtin_crealf:
484   case Builtin::BI__builtin_creall:
485   case Builtin::BIcreal:
486   case Builtin::BIcrealf:
487   case Builtin::BIcreall: {
488     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
489     return RValue::get(ComplexVal.first);
490   }
491 
492   case Builtin::BI__builtin_cimag:
493   case Builtin::BI__builtin_cimagf:
494   case Builtin::BI__builtin_cimagl:
495   case Builtin::BIcimag:
496   case Builtin::BIcimagf:
497   case Builtin::BIcimagl: {
498     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
499     return RValue::get(ComplexVal.second);
500   }
501 
502   case Builtin::BI__builtin_ctzs:
503   case Builtin::BI__builtin_ctz:
504   case Builtin::BI__builtin_ctzl:
505   case Builtin::BI__builtin_ctzll: {
506     Value *ArgValue = EmitScalarExpr(E->getArg(0));
507 
508     llvm::Type *ArgType = ArgValue->getType();
509     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
510 
511     llvm::Type *ResultType = ConvertType(E->getType());
512     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
513     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
514     if (Result->getType() != ResultType)
515       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
516                                      "cast");
517     return RValue::get(Result);
518   }
519   case Builtin::BI__builtin_clzs:
520   case Builtin::BI__builtin_clz:
521   case Builtin::BI__builtin_clzl:
522   case Builtin::BI__builtin_clzll: {
523     Value *ArgValue = EmitScalarExpr(E->getArg(0));
524 
525     llvm::Type *ArgType = ArgValue->getType();
526     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
527 
528     llvm::Type *ResultType = ConvertType(E->getType());
529     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
530     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
531     if (Result->getType() != ResultType)
532       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
533                                      "cast");
534     return RValue::get(Result);
535   }
536   case Builtin::BI__builtin_ffs:
537   case Builtin::BI__builtin_ffsl:
538   case Builtin::BI__builtin_ffsll: {
539     // ffs(x) -> x ? cttz(x) + 1 : 0
540     Value *ArgValue = EmitScalarExpr(E->getArg(0));
541 
542     llvm::Type *ArgType = ArgValue->getType();
543     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
544 
545     llvm::Type *ResultType = ConvertType(E->getType());
546     Value *Tmp =
547         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
548                           llvm::ConstantInt::get(ArgType, 1));
549     Value *Zero = llvm::Constant::getNullValue(ArgType);
550     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
551     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
552     if (Result->getType() != ResultType)
553       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
554                                      "cast");
555     return RValue::get(Result);
556   }
557   case Builtin::BI__builtin_parity:
558   case Builtin::BI__builtin_parityl:
559   case Builtin::BI__builtin_parityll: {
560     // parity(x) -> ctpop(x) & 1
561     Value *ArgValue = EmitScalarExpr(E->getArg(0));
562 
563     llvm::Type *ArgType = ArgValue->getType();
564     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
565 
566     llvm::Type *ResultType = ConvertType(E->getType());
567     Value *Tmp = Builder.CreateCall(F, ArgValue);
568     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
569     if (Result->getType() != ResultType)
570       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
571                                      "cast");
572     return RValue::get(Result);
573   }
574   case Builtin::BI__builtin_popcount:
575   case Builtin::BI__builtin_popcountl:
576   case Builtin::BI__builtin_popcountll: {
577     Value *ArgValue = EmitScalarExpr(E->getArg(0));
578 
579     llvm::Type *ArgType = ArgValue->getType();
580     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
581 
582     llvm::Type *ResultType = ConvertType(E->getType());
583     Value *Result = Builder.CreateCall(F, ArgValue);
584     if (Result->getType() != ResultType)
585       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
586                                      "cast");
587     return RValue::get(Result);
588   }
589   case Builtin::BI__builtin_unpredictable: {
590     // Always return the argument of __builtin_unpredictable. LLVM does not
591     // handle this builtin. Metadata for this builtin should be added directly
592     // to instructions such as branches or switches that use it.
593     return RValue::get(EmitScalarExpr(E->getArg(0)));
594   }
595   case Builtin::BI__builtin_expect: {
596     Value *ArgValue = EmitScalarExpr(E->getArg(0));
597     llvm::Type *ArgType = ArgValue->getType();
598 
599     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
600     // Don't generate llvm.expect on -O0 as the backend won't use it for
601     // anything.
602     // Note, we still IRGen ExpectedValue because it could have side-effects.
603     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
604       return RValue::get(ArgValue);
605 
606     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
607     Value *Result =
608         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
609     return RValue::get(Result);
610   }
611   case Builtin::BI__builtin_assume_aligned: {
612     Value *PtrValue = EmitScalarExpr(E->getArg(0));
613     Value *OffsetValue =
614       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
615 
616     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
617     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
618     unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
619 
620     EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
621     return RValue::get(PtrValue);
622   }
623   case Builtin::BI__assume:
624   case Builtin::BI__builtin_assume: {
625     if (E->getArg(0)->HasSideEffects(getContext()))
626       return RValue::get(nullptr);
627 
628     Value *ArgValue = EmitScalarExpr(E->getArg(0));
629     Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
630     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
631   }
632   case Builtin::BI__builtin_bswap16:
633   case Builtin::BI__builtin_bswap32:
634   case Builtin::BI__builtin_bswap64: {
635     Value *ArgValue = EmitScalarExpr(E->getArg(0));
636     llvm::Type *ArgType = ArgValue->getType();
637     Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType);
638     return RValue::get(Builder.CreateCall(F, ArgValue));
639   }
640   case Builtin::BI__builtin_object_size: {
641     // We rely on constant folding to deal with expressions with side effects.
642     assert(!E->getArg(0)->HasSideEffects(getContext()) &&
643            "should have been constant folded");
644 
645     // We pass this builtin onto the optimizer so that it can
646     // figure out the object size in more complex cases.
647     llvm::Type *ResType = ConvertType(E->getType());
648 
649     // LLVM only supports 0 and 2, make sure that we pass along that
650     // as a boolean.
651     Value *Ty = EmitScalarExpr(E->getArg(1));
652     ConstantInt *CI = dyn_cast<ConstantInt>(Ty);
653     assert(CI);
654     uint64_t val = CI->getZExtValue();
655     CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1);
656     // FIXME: Get right address space.
657     llvm::Type *Tys[] = { ResType, Builder.getInt8PtrTy(0) };
658     Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
659     return RValue::get(
660         Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0)), CI}));
661   }
662   case Builtin::BI__builtin_prefetch: {
663     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
664     // FIXME: Technically these constants should of type 'int', yes?
665     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
666       llvm::ConstantInt::get(Int32Ty, 0);
667     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
668       llvm::ConstantInt::get(Int32Ty, 3);
669     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
670     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
671     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
672   }
673   case Builtin::BI__builtin_readcyclecounter: {
674     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
675     return RValue::get(Builder.CreateCall(F));
676   }
677   case Builtin::BI__builtin___clear_cache: {
678     Value *Begin = EmitScalarExpr(E->getArg(0));
679     Value *End = EmitScalarExpr(E->getArg(1));
680     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
681     return RValue::get(Builder.CreateCall(F, {Begin, End}));
682   }
683   case Builtin::BI__builtin_trap:
684     return RValue::get(EmitTrapCall(Intrinsic::trap));
685   case Builtin::BI__debugbreak:
686     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
687   case Builtin::BI__builtin_unreachable: {
688     if (SanOpts.has(SanitizerKind::Unreachable)) {
689       SanitizerScope SanScope(this);
690       EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
691                                SanitizerKind::Unreachable),
692                 "builtin_unreachable", EmitCheckSourceLocation(E->getExprLoc()),
693                 None);
694     } else
695       Builder.CreateUnreachable();
696 
697     // We do need to preserve an insertion point.
698     EmitBlock(createBasicBlock("unreachable.cont"));
699 
700     return RValue::get(nullptr);
701   }
702 
703   case Builtin::BI__builtin_powi:
704   case Builtin::BI__builtin_powif:
705   case Builtin::BI__builtin_powil: {
706     Value *Base = EmitScalarExpr(E->getArg(0));
707     Value *Exponent = EmitScalarExpr(E->getArg(1));
708     llvm::Type *ArgType = Base->getType();
709     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
710     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
711   }
712 
713   case Builtin::BI__builtin_isgreater:
714   case Builtin::BI__builtin_isgreaterequal:
715   case Builtin::BI__builtin_isless:
716   case Builtin::BI__builtin_islessequal:
717   case Builtin::BI__builtin_islessgreater:
718   case Builtin::BI__builtin_isunordered: {
719     // Ordered comparisons: we know the arguments to these are matching scalar
720     // floating point values.
721     Value *LHS = EmitScalarExpr(E->getArg(0));
722     Value *RHS = EmitScalarExpr(E->getArg(1));
723 
724     switch (BuiltinID) {
725     default: llvm_unreachable("Unknown ordered comparison");
726     case Builtin::BI__builtin_isgreater:
727       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
728       break;
729     case Builtin::BI__builtin_isgreaterequal:
730       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
731       break;
732     case Builtin::BI__builtin_isless:
733       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
734       break;
735     case Builtin::BI__builtin_islessequal:
736       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
737       break;
738     case Builtin::BI__builtin_islessgreater:
739       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
740       break;
741     case Builtin::BI__builtin_isunordered:
742       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
743       break;
744     }
745     // ZExt bool to int type.
746     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
747   }
748   case Builtin::BI__builtin_isnan: {
749     Value *V = EmitScalarExpr(E->getArg(0));
750     V = Builder.CreateFCmpUNO(V, V, "cmp");
751     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
752   }
753 
754   case Builtin::BI__builtin_isinf: {
755     // isinf(x) --> fabs(x) == infinity
756     Value *V = EmitScalarExpr(E->getArg(0));
757     V = EmitFAbs(*this, V);
758 
759     V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
760     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
761   }
762 
763   case Builtin::BI__builtin_isinf_sign: {
764     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
765     Value *Arg = EmitScalarExpr(E->getArg(0));
766     Value *AbsArg = EmitFAbs(*this, Arg);
767     Value *IsInf = Builder.CreateFCmpOEQ(
768         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
769     Value *IsNeg = EmitSignBit(*this, Arg);
770 
771     llvm::Type *IntTy = ConvertType(E->getType());
772     Value *Zero = Constant::getNullValue(IntTy);
773     Value *One = ConstantInt::get(IntTy, 1);
774     Value *NegativeOne = ConstantInt::get(IntTy, -1);
775     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
776     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
777     return RValue::get(Result);
778   }
779 
780   case Builtin::BI__builtin_isnormal: {
781     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
782     Value *V = EmitScalarExpr(E->getArg(0));
783     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
784 
785     Value *Abs = EmitFAbs(*this, V);
786     Value *IsLessThanInf =
787       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
788     APFloat Smallest = APFloat::getSmallestNormalized(
789                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
790     Value *IsNormal =
791       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
792                             "isnormal");
793     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
794     V = Builder.CreateAnd(V, IsNormal, "and");
795     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
796   }
797 
798   case Builtin::BI__builtin_isfinite: {
799     // isfinite(x) --> x == x && fabs(x) != infinity;
800     Value *V = EmitScalarExpr(E->getArg(0));
801     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
802 
803     Value *Abs = EmitFAbs(*this, V);
804     Value *IsNotInf =
805       Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
806 
807     V = Builder.CreateAnd(Eq, IsNotInf, "and");
808     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
809   }
810 
811   case Builtin::BI__builtin_fpclassify: {
812     Value *V = EmitScalarExpr(E->getArg(5));
813     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
814 
815     // Create Result
816     BasicBlock *Begin = Builder.GetInsertBlock();
817     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
818     Builder.SetInsertPoint(End);
819     PHINode *Result =
820       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
821                         "fpclassify_result");
822 
823     // if (V==0) return FP_ZERO
824     Builder.SetInsertPoint(Begin);
825     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
826                                           "iszero");
827     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
828     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
829     Builder.CreateCondBr(IsZero, End, NotZero);
830     Result->addIncoming(ZeroLiteral, Begin);
831 
832     // if (V != V) return FP_NAN
833     Builder.SetInsertPoint(NotZero);
834     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
835     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
836     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
837     Builder.CreateCondBr(IsNan, End, NotNan);
838     Result->addIncoming(NanLiteral, NotZero);
839 
840     // if (fabs(V) == infinity) return FP_INFINITY
841     Builder.SetInsertPoint(NotNan);
842     Value *VAbs = EmitFAbs(*this, V);
843     Value *IsInf =
844       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
845                             "isinf");
846     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
847     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
848     Builder.CreateCondBr(IsInf, End, NotInf);
849     Result->addIncoming(InfLiteral, NotNan);
850 
851     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
852     Builder.SetInsertPoint(NotInf);
853     APFloat Smallest = APFloat::getSmallestNormalized(
854         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
855     Value *IsNormal =
856       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
857                             "isnormal");
858     Value *NormalResult =
859       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
860                            EmitScalarExpr(E->getArg(3)));
861     Builder.CreateBr(End);
862     Result->addIncoming(NormalResult, NotInf);
863 
864     // return Result
865     Builder.SetInsertPoint(End);
866     return RValue::get(Result);
867   }
868 
869   case Builtin::BIalloca:
870   case Builtin::BI_alloca:
871   case Builtin::BI__builtin_alloca: {
872     Value *Size = EmitScalarExpr(E->getArg(0));
873     return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
874   }
875   case Builtin::BIbzero:
876   case Builtin::BI__builtin_bzero: {
877     Address Dest = EmitPointerWithAlignment(E->getArg(0));
878     Value *SizeVal = EmitScalarExpr(E->getArg(1));
879     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
880                         E->getArg(0)->getExprLoc(), FD, 0);
881     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
882     return RValue::get(Dest.getPointer());
883   }
884   case Builtin::BImemcpy:
885   case Builtin::BI__builtin_memcpy: {
886     Address Dest = EmitPointerWithAlignment(E->getArg(0));
887     Address Src = EmitPointerWithAlignment(E->getArg(1));
888     Value *SizeVal = EmitScalarExpr(E->getArg(2));
889     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
890                         E->getArg(0)->getExprLoc(), FD, 0);
891     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
892                         E->getArg(1)->getExprLoc(), FD, 1);
893     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
894     return RValue::get(Dest.getPointer());
895   }
896 
897   case Builtin::BI__builtin___memcpy_chk: {
898     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
899     llvm::APSInt Size, DstSize;
900     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
901         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
902       break;
903     if (Size.ugt(DstSize))
904       break;
905     Address Dest = EmitPointerWithAlignment(E->getArg(0));
906     Address Src = EmitPointerWithAlignment(E->getArg(1));
907     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
908     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
909     return RValue::get(Dest.getPointer());
910   }
911 
912   case Builtin::BI__builtin_objc_memmove_collectable: {
913     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
914     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
915     Value *SizeVal = EmitScalarExpr(E->getArg(2));
916     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
917                                                   DestAddr, SrcAddr, SizeVal);
918     return RValue::get(DestAddr.getPointer());
919   }
920 
921   case Builtin::BI__builtin___memmove_chk: {
922     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
923     llvm::APSInt Size, DstSize;
924     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
925         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
926       break;
927     if (Size.ugt(DstSize))
928       break;
929     Address Dest = EmitPointerWithAlignment(E->getArg(0));
930     Address Src = EmitPointerWithAlignment(E->getArg(1));
931     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
932     Builder.CreateMemMove(Dest, Src, SizeVal, false);
933     return RValue::get(Dest.getPointer());
934   }
935 
936   case Builtin::BImemmove:
937   case Builtin::BI__builtin_memmove: {
938     Address Dest = EmitPointerWithAlignment(E->getArg(0));
939     Address Src = EmitPointerWithAlignment(E->getArg(1));
940     Value *SizeVal = EmitScalarExpr(E->getArg(2));
941     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
942                         E->getArg(0)->getExprLoc(), FD, 0);
943     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
944                         E->getArg(1)->getExprLoc(), FD, 1);
945     Builder.CreateMemMove(Dest, Src, SizeVal, false);
946     return RValue::get(Dest.getPointer());
947   }
948   case Builtin::BImemset:
949   case Builtin::BI__builtin_memset: {
950     Address Dest = EmitPointerWithAlignment(E->getArg(0));
951     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
952                                          Builder.getInt8Ty());
953     Value *SizeVal = EmitScalarExpr(E->getArg(2));
954     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
955                         E->getArg(0)->getExprLoc(), FD, 0);
956     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
957     return RValue::get(Dest.getPointer());
958   }
959   case Builtin::BI__builtin___memset_chk: {
960     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
961     llvm::APSInt Size, DstSize;
962     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
963         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
964       break;
965     if (Size.ugt(DstSize))
966       break;
967     Address Dest = EmitPointerWithAlignment(E->getArg(0));
968     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
969                                          Builder.getInt8Ty());
970     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
971     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
972     return RValue::get(Dest.getPointer());
973   }
974   case Builtin::BI__builtin_dwarf_cfa: {
975     // The offset in bytes from the first argument to the CFA.
976     //
977     // Why on earth is this in the frontend?  Is there any reason at
978     // all that the backend can't reasonably determine this while
979     // lowering llvm.eh.dwarf.cfa()?
980     //
981     // TODO: If there's a satisfactory reason, add a target hook for
982     // this instead of hard-coding 0, which is correct for most targets.
983     int32_t Offset = 0;
984 
985     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
986     return RValue::get(Builder.CreateCall(F,
987                                       llvm::ConstantInt::get(Int32Ty, Offset)));
988   }
989   case Builtin::BI__builtin_return_address: {
990     Value *Depth =
991         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
992     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
993     return RValue::get(Builder.CreateCall(F, Depth));
994   }
995   case Builtin::BI__builtin_frame_address: {
996     Value *Depth =
997         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
998     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
999     return RValue::get(Builder.CreateCall(F, Depth));
1000   }
1001   case Builtin::BI__builtin_extract_return_addr: {
1002     Value *Address = EmitScalarExpr(E->getArg(0));
1003     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1004     return RValue::get(Result);
1005   }
1006   case Builtin::BI__builtin_frob_return_addr: {
1007     Value *Address = EmitScalarExpr(E->getArg(0));
1008     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1009     return RValue::get(Result);
1010   }
1011   case Builtin::BI__builtin_dwarf_sp_column: {
1012     llvm::IntegerType *Ty
1013       = cast<llvm::IntegerType>(ConvertType(E->getType()));
1014     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1015     if (Column == -1) {
1016       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1017       return RValue::get(llvm::UndefValue::get(Ty));
1018     }
1019     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1020   }
1021   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1022     Value *Address = EmitScalarExpr(E->getArg(0));
1023     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1024       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1025     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1026   }
1027   case Builtin::BI__builtin_eh_return: {
1028     Value *Int = EmitScalarExpr(E->getArg(0));
1029     Value *Ptr = EmitScalarExpr(E->getArg(1));
1030 
1031     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1032     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1033            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1034     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1035                                   ? Intrinsic::eh_return_i32
1036                                   : Intrinsic::eh_return_i64);
1037     Builder.CreateCall(F, {Int, Ptr});
1038     Builder.CreateUnreachable();
1039 
1040     // We do need to preserve an insertion point.
1041     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1042 
1043     return RValue::get(nullptr);
1044   }
1045   case Builtin::BI__builtin_unwind_init: {
1046     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1047     return RValue::get(Builder.CreateCall(F));
1048   }
1049   case Builtin::BI__builtin_extend_pointer: {
1050     // Extends a pointer to the size of an _Unwind_Word, which is
1051     // uint64_t on all platforms.  Generally this gets poked into a
1052     // register and eventually used as an address, so if the
1053     // addressing registers are wider than pointers and the platform
1054     // doesn't implicitly ignore high-order bits when doing
1055     // addressing, we need to make sure we zext / sext based on
1056     // the platform's expectations.
1057     //
1058     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1059 
1060     // Cast the pointer to intptr_t.
1061     Value *Ptr = EmitScalarExpr(E->getArg(0));
1062     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1063 
1064     // If that's 64 bits, we're done.
1065     if (IntPtrTy->getBitWidth() == 64)
1066       return RValue::get(Result);
1067 
1068     // Otherwise, ask the codegen data what to do.
1069     if (getTargetHooks().extendPointerWithSExt())
1070       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1071     else
1072       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1073   }
1074   case Builtin::BI__builtin_setjmp: {
1075     // Buffer is a void**.
1076     Address Buf = EmitPointerWithAlignment(E->getArg(0));
1077 
1078     // Store the frame pointer to the setjmp buffer.
1079     Value *FrameAddr =
1080       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1081                          ConstantInt::get(Int32Ty, 0));
1082     Builder.CreateStore(FrameAddr, Buf);
1083 
1084     // Store the stack pointer to the setjmp buffer.
1085     Value *StackAddr =
1086         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1087     Address StackSaveSlot =
1088       Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1089     Builder.CreateStore(StackAddr, StackSaveSlot);
1090 
1091     // Call LLVM's EH setjmp, which is lightweight.
1092     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1093     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1094     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1095   }
1096   case Builtin::BI__builtin_longjmp: {
1097     Value *Buf = EmitScalarExpr(E->getArg(0));
1098     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1099 
1100     // Call LLVM's EH longjmp, which is lightweight.
1101     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1102 
1103     // longjmp doesn't return; mark this as unreachable.
1104     Builder.CreateUnreachable();
1105 
1106     // We do need to preserve an insertion point.
1107     EmitBlock(createBasicBlock("longjmp.cont"));
1108 
1109     return RValue::get(nullptr);
1110   }
1111   case Builtin::BI__sync_fetch_and_add:
1112   case Builtin::BI__sync_fetch_and_sub:
1113   case Builtin::BI__sync_fetch_and_or:
1114   case Builtin::BI__sync_fetch_and_and:
1115   case Builtin::BI__sync_fetch_and_xor:
1116   case Builtin::BI__sync_fetch_and_nand:
1117   case Builtin::BI__sync_add_and_fetch:
1118   case Builtin::BI__sync_sub_and_fetch:
1119   case Builtin::BI__sync_and_and_fetch:
1120   case Builtin::BI__sync_or_and_fetch:
1121   case Builtin::BI__sync_xor_and_fetch:
1122   case Builtin::BI__sync_nand_and_fetch:
1123   case Builtin::BI__sync_val_compare_and_swap:
1124   case Builtin::BI__sync_bool_compare_and_swap:
1125   case Builtin::BI__sync_lock_test_and_set:
1126   case Builtin::BI__sync_lock_release:
1127   case Builtin::BI__sync_swap:
1128     llvm_unreachable("Shouldn't make it through sema");
1129   case Builtin::BI__sync_fetch_and_add_1:
1130   case Builtin::BI__sync_fetch_and_add_2:
1131   case Builtin::BI__sync_fetch_and_add_4:
1132   case Builtin::BI__sync_fetch_and_add_8:
1133   case Builtin::BI__sync_fetch_and_add_16:
1134     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1135   case Builtin::BI__sync_fetch_and_sub_1:
1136   case Builtin::BI__sync_fetch_and_sub_2:
1137   case Builtin::BI__sync_fetch_and_sub_4:
1138   case Builtin::BI__sync_fetch_and_sub_8:
1139   case Builtin::BI__sync_fetch_and_sub_16:
1140     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1141   case Builtin::BI__sync_fetch_and_or_1:
1142   case Builtin::BI__sync_fetch_and_or_2:
1143   case Builtin::BI__sync_fetch_and_or_4:
1144   case Builtin::BI__sync_fetch_and_or_8:
1145   case Builtin::BI__sync_fetch_and_or_16:
1146     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1147   case Builtin::BI__sync_fetch_and_and_1:
1148   case Builtin::BI__sync_fetch_and_and_2:
1149   case Builtin::BI__sync_fetch_and_and_4:
1150   case Builtin::BI__sync_fetch_and_and_8:
1151   case Builtin::BI__sync_fetch_and_and_16:
1152     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1153   case Builtin::BI__sync_fetch_and_xor_1:
1154   case Builtin::BI__sync_fetch_and_xor_2:
1155   case Builtin::BI__sync_fetch_and_xor_4:
1156   case Builtin::BI__sync_fetch_and_xor_8:
1157   case Builtin::BI__sync_fetch_and_xor_16:
1158     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1159   case Builtin::BI__sync_fetch_and_nand_1:
1160   case Builtin::BI__sync_fetch_and_nand_2:
1161   case Builtin::BI__sync_fetch_and_nand_4:
1162   case Builtin::BI__sync_fetch_and_nand_8:
1163   case Builtin::BI__sync_fetch_and_nand_16:
1164     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1165 
1166   // Clang extensions: not overloaded yet.
1167   case Builtin::BI__sync_fetch_and_min:
1168     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1169   case Builtin::BI__sync_fetch_and_max:
1170     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1171   case Builtin::BI__sync_fetch_and_umin:
1172     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1173   case Builtin::BI__sync_fetch_and_umax:
1174     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1175 
1176   case Builtin::BI__sync_add_and_fetch_1:
1177   case Builtin::BI__sync_add_and_fetch_2:
1178   case Builtin::BI__sync_add_and_fetch_4:
1179   case Builtin::BI__sync_add_and_fetch_8:
1180   case Builtin::BI__sync_add_and_fetch_16:
1181     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1182                                 llvm::Instruction::Add);
1183   case Builtin::BI__sync_sub_and_fetch_1:
1184   case Builtin::BI__sync_sub_and_fetch_2:
1185   case Builtin::BI__sync_sub_and_fetch_4:
1186   case Builtin::BI__sync_sub_and_fetch_8:
1187   case Builtin::BI__sync_sub_and_fetch_16:
1188     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1189                                 llvm::Instruction::Sub);
1190   case Builtin::BI__sync_and_and_fetch_1:
1191   case Builtin::BI__sync_and_and_fetch_2:
1192   case Builtin::BI__sync_and_and_fetch_4:
1193   case Builtin::BI__sync_and_and_fetch_8:
1194   case Builtin::BI__sync_and_and_fetch_16:
1195     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1196                                 llvm::Instruction::And);
1197   case Builtin::BI__sync_or_and_fetch_1:
1198   case Builtin::BI__sync_or_and_fetch_2:
1199   case Builtin::BI__sync_or_and_fetch_4:
1200   case Builtin::BI__sync_or_and_fetch_8:
1201   case Builtin::BI__sync_or_and_fetch_16:
1202     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1203                                 llvm::Instruction::Or);
1204   case Builtin::BI__sync_xor_and_fetch_1:
1205   case Builtin::BI__sync_xor_and_fetch_2:
1206   case Builtin::BI__sync_xor_and_fetch_4:
1207   case Builtin::BI__sync_xor_and_fetch_8:
1208   case Builtin::BI__sync_xor_and_fetch_16:
1209     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1210                                 llvm::Instruction::Xor);
1211   case Builtin::BI__sync_nand_and_fetch_1:
1212   case Builtin::BI__sync_nand_and_fetch_2:
1213   case Builtin::BI__sync_nand_and_fetch_4:
1214   case Builtin::BI__sync_nand_and_fetch_8:
1215   case Builtin::BI__sync_nand_and_fetch_16:
1216     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1217                                 llvm::Instruction::And, true);
1218 
1219   case Builtin::BI__sync_val_compare_and_swap_1:
1220   case Builtin::BI__sync_val_compare_and_swap_2:
1221   case Builtin::BI__sync_val_compare_and_swap_4:
1222   case Builtin::BI__sync_val_compare_and_swap_8:
1223   case Builtin::BI__sync_val_compare_and_swap_16:
1224     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1225 
1226   case Builtin::BI__sync_bool_compare_and_swap_1:
1227   case Builtin::BI__sync_bool_compare_and_swap_2:
1228   case Builtin::BI__sync_bool_compare_and_swap_4:
1229   case Builtin::BI__sync_bool_compare_and_swap_8:
1230   case Builtin::BI__sync_bool_compare_and_swap_16:
1231     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1232 
1233   case Builtin::BI__sync_swap_1:
1234   case Builtin::BI__sync_swap_2:
1235   case Builtin::BI__sync_swap_4:
1236   case Builtin::BI__sync_swap_8:
1237   case Builtin::BI__sync_swap_16:
1238     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1239 
1240   case Builtin::BI__sync_lock_test_and_set_1:
1241   case Builtin::BI__sync_lock_test_and_set_2:
1242   case Builtin::BI__sync_lock_test_and_set_4:
1243   case Builtin::BI__sync_lock_test_and_set_8:
1244   case Builtin::BI__sync_lock_test_and_set_16:
1245     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1246 
1247   case Builtin::BI__sync_lock_release_1:
1248   case Builtin::BI__sync_lock_release_2:
1249   case Builtin::BI__sync_lock_release_4:
1250   case Builtin::BI__sync_lock_release_8:
1251   case Builtin::BI__sync_lock_release_16: {
1252     Value *Ptr = EmitScalarExpr(E->getArg(0));
1253     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1254     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1255     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1256                                              StoreSize.getQuantity() * 8);
1257     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1258     llvm::StoreInst *Store =
1259       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1260                                  StoreSize);
1261     Store->setAtomic(llvm::Release);
1262     return RValue::get(nullptr);
1263   }
1264 
1265   case Builtin::BI__sync_synchronize: {
1266     // We assume this is supposed to correspond to a C++0x-style
1267     // sequentially-consistent fence (i.e. this is only usable for
1268     // synchonization, not device I/O or anything like that). This intrinsic
1269     // is really badly designed in the sense that in theory, there isn't
1270     // any way to safely use it... but in practice, it mostly works
1271     // to use it with non-atomic loads and stores to get acquire/release
1272     // semantics.
1273     Builder.CreateFence(llvm::SequentiallyConsistent);
1274     return RValue::get(nullptr);
1275   }
1276 
1277   case Builtin::BI__builtin_nontemporal_load:
1278     return RValue::get(EmitNontemporalLoad(*this, E));
1279   case Builtin::BI__builtin_nontemporal_store:
1280     return RValue::get(EmitNontemporalStore(*this, E));
1281   case Builtin::BI__c11_atomic_is_lock_free:
1282   case Builtin::BI__atomic_is_lock_free: {
1283     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1284     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1285     // _Atomic(T) is always properly-aligned.
1286     const char *LibCallName = "__atomic_is_lock_free";
1287     CallArgList Args;
1288     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1289              getContext().getSizeType());
1290     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1291       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1292                getContext().VoidPtrTy);
1293     else
1294       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1295                getContext().VoidPtrTy);
1296     const CGFunctionInfo &FuncInfo =
1297         CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args,
1298                                                FunctionType::ExtInfo(),
1299                                                RequiredArgs::All);
1300     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1301     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1302     return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
1303   }
1304 
1305   case Builtin::BI__atomic_test_and_set: {
1306     // Look at the argument type to determine whether this is a volatile
1307     // operation. The parameter type is always volatile.
1308     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1309     bool Volatile =
1310         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1311 
1312     Value *Ptr = EmitScalarExpr(E->getArg(0));
1313     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1314     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1315     Value *NewVal = Builder.getInt8(1);
1316     Value *Order = EmitScalarExpr(E->getArg(1));
1317     if (isa<llvm::ConstantInt>(Order)) {
1318       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1319       AtomicRMWInst *Result = nullptr;
1320       switch (ord) {
1321       case 0:  // memory_order_relaxed
1322       default: // invalid order
1323         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1324                                          Ptr, NewVal,
1325                                          llvm::Monotonic);
1326         break;
1327       case 1:  // memory_order_consume
1328       case 2:  // memory_order_acquire
1329         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1330                                          Ptr, NewVal,
1331                                          llvm::Acquire);
1332         break;
1333       case 3:  // memory_order_release
1334         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1335                                          Ptr, NewVal,
1336                                          llvm::Release);
1337         break;
1338       case 4:  // memory_order_acq_rel
1339         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1340                                          Ptr, NewVal,
1341                                          llvm::AcquireRelease);
1342         break;
1343       case 5:  // memory_order_seq_cst
1344         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1345                                          Ptr, NewVal,
1346                                          llvm::SequentiallyConsistent);
1347         break;
1348       }
1349       Result->setVolatile(Volatile);
1350       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1351     }
1352 
1353     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1354 
1355     llvm::BasicBlock *BBs[5] = {
1356       createBasicBlock("monotonic", CurFn),
1357       createBasicBlock("acquire", CurFn),
1358       createBasicBlock("release", CurFn),
1359       createBasicBlock("acqrel", CurFn),
1360       createBasicBlock("seqcst", CurFn)
1361     };
1362     llvm::AtomicOrdering Orders[5] = {
1363       llvm::Monotonic, llvm::Acquire, llvm::Release,
1364       llvm::AcquireRelease, llvm::SequentiallyConsistent
1365     };
1366 
1367     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1368     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1369 
1370     Builder.SetInsertPoint(ContBB);
1371     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1372 
1373     for (unsigned i = 0; i < 5; ++i) {
1374       Builder.SetInsertPoint(BBs[i]);
1375       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1376                                                    Ptr, NewVal, Orders[i]);
1377       RMW->setVolatile(Volatile);
1378       Result->addIncoming(RMW, BBs[i]);
1379       Builder.CreateBr(ContBB);
1380     }
1381 
1382     SI->addCase(Builder.getInt32(0), BBs[0]);
1383     SI->addCase(Builder.getInt32(1), BBs[1]);
1384     SI->addCase(Builder.getInt32(2), BBs[1]);
1385     SI->addCase(Builder.getInt32(3), BBs[2]);
1386     SI->addCase(Builder.getInt32(4), BBs[3]);
1387     SI->addCase(Builder.getInt32(5), BBs[4]);
1388 
1389     Builder.SetInsertPoint(ContBB);
1390     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1391   }
1392 
1393   case Builtin::BI__atomic_clear: {
1394     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1395     bool Volatile =
1396         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1397 
1398     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1399     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1400     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1401     Value *NewVal = Builder.getInt8(0);
1402     Value *Order = EmitScalarExpr(E->getArg(1));
1403     if (isa<llvm::ConstantInt>(Order)) {
1404       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1405       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1406       switch (ord) {
1407       case 0:  // memory_order_relaxed
1408       default: // invalid order
1409         Store->setOrdering(llvm::Monotonic);
1410         break;
1411       case 3:  // memory_order_release
1412         Store->setOrdering(llvm::Release);
1413         break;
1414       case 5:  // memory_order_seq_cst
1415         Store->setOrdering(llvm::SequentiallyConsistent);
1416         break;
1417       }
1418       return RValue::get(nullptr);
1419     }
1420 
1421     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1422 
1423     llvm::BasicBlock *BBs[3] = {
1424       createBasicBlock("monotonic", CurFn),
1425       createBasicBlock("release", CurFn),
1426       createBasicBlock("seqcst", CurFn)
1427     };
1428     llvm::AtomicOrdering Orders[3] = {
1429       llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent
1430     };
1431 
1432     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1433     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1434 
1435     for (unsigned i = 0; i < 3; ++i) {
1436       Builder.SetInsertPoint(BBs[i]);
1437       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1438       Store->setOrdering(Orders[i]);
1439       Builder.CreateBr(ContBB);
1440     }
1441 
1442     SI->addCase(Builder.getInt32(0), BBs[0]);
1443     SI->addCase(Builder.getInt32(3), BBs[1]);
1444     SI->addCase(Builder.getInt32(5), BBs[2]);
1445 
1446     Builder.SetInsertPoint(ContBB);
1447     return RValue::get(nullptr);
1448   }
1449 
1450   case Builtin::BI__atomic_thread_fence:
1451   case Builtin::BI__atomic_signal_fence:
1452   case Builtin::BI__c11_atomic_thread_fence:
1453   case Builtin::BI__c11_atomic_signal_fence: {
1454     llvm::SynchronizationScope Scope;
1455     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1456         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1457       Scope = llvm::SingleThread;
1458     else
1459       Scope = llvm::CrossThread;
1460     Value *Order = EmitScalarExpr(E->getArg(0));
1461     if (isa<llvm::ConstantInt>(Order)) {
1462       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1463       switch (ord) {
1464       case 0:  // memory_order_relaxed
1465       default: // invalid order
1466         break;
1467       case 1:  // memory_order_consume
1468       case 2:  // memory_order_acquire
1469         Builder.CreateFence(llvm::Acquire, Scope);
1470         break;
1471       case 3:  // memory_order_release
1472         Builder.CreateFence(llvm::Release, Scope);
1473         break;
1474       case 4:  // memory_order_acq_rel
1475         Builder.CreateFence(llvm::AcquireRelease, Scope);
1476         break;
1477       case 5:  // memory_order_seq_cst
1478         Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1479         break;
1480       }
1481       return RValue::get(nullptr);
1482     }
1483 
1484     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1485     AcquireBB = createBasicBlock("acquire", CurFn);
1486     ReleaseBB = createBasicBlock("release", CurFn);
1487     AcqRelBB = createBasicBlock("acqrel", CurFn);
1488     SeqCstBB = createBasicBlock("seqcst", CurFn);
1489     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1490 
1491     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1492     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1493 
1494     Builder.SetInsertPoint(AcquireBB);
1495     Builder.CreateFence(llvm::Acquire, Scope);
1496     Builder.CreateBr(ContBB);
1497     SI->addCase(Builder.getInt32(1), AcquireBB);
1498     SI->addCase(Builder.getInt32(2), AcquireBB);
1499 
1500     Builder.SetInsertPoint(ReleaseBB);
1501     Builder.CreateFence(llvm::Release, Scope);
1502     Builder.CreateBr(ContBB);
1503     SI->addCase(Builder.getInt32(3), ReleaseBB);
1504 
1505     Builder.SetInsertPoint(AcqRelBB);
1506     Builder.CreateFence(llvm::AcquireRelease, Scope);
1507     Builder.CreateBr(ContBB);
1508     SI->addCase(Builder.getInt32(4), AcqRelBB);
1509 
1510     Builder.SetInsertPoint(SeqCstBB);
1511     Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1512     Builder.CreateBr(ContBB);
1513     SI->addCase(Builder.getInt32(5), SeqCstBB);
1514 
1515     Builder.SetInsertPoint(ContBB);
1516     return RValue::get(nullptr);
1517   }
1518 
1519     // Library functions with special handling.
1520   case Builtin::BIsqrt:
1521   case Builtin::BIsqrtf:
1522   case Builtin::BIsqrtl: {
1523     // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1524     // in finite- or unsafe-math mode (the intrinsic has different semantics
1525     // for handling negative numbers compared to the library function, so
1526     // -fmath-errno=0 is not enough).
1527     if (!FD->hasAttr<ConstAttr>())
1528       break;
1529     if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1530           CGM.getCodeGenOpts().NoNaNsFPMath))
1531       break;
1532     Value *Arg0 = EmitScalarExpr(E->getArg(0));
1533     llvm::Type *ArgType = Arg0->getType();
1534     Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1535     return RValue::get(Builder.CreateCall(F, Arg0));
1536   }
1537 
1538   case Builtin::BI__builtin_pow:
1539   case Builtin::BI__builtin_powf:
1540   case Builtin::BI__builtin_powl:
1541   case Builtin::BIpow:
1542   case Builtin::BIpowf:
1543   case Builtin::BIpowl: {
1544     // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1545     if (!FD->hasAttr<ConstAttr>())
1546       break;
1547     Value *Base = EmitScalarExpr(E->getArg(0));
1548     Value *Exponent = EmitScalarExpr(E->getArg(1));
1549     llvm::Type *ArgType = Base->getType();
1550     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1551     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1552   }
1553 
1554   case Builtin::BIfma:
1555   case Builtin::BIfmaf:
1556   case Builtin::BIfmal:
1557   case Builtin::BI__builtin_fma:
1558   case Builtin::BI__builtin_fmaf:
1559   case Builtin::BI__builtin_fmal: {
1560     // Rewrite fma to intrinsic.
1561     Value *FirstArg = EmitScalarExpr(E->getArg(0));
1562     llvm::Type *ArgType = FirstArg->getType();
1563     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1564     return RValue::get(
1565         Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1566                                EmitScalarExpr(E->getArg(2))}));
1567   }
1568 
1569   case Builtin::BI__builtin_signbit:
1570   case Builtin::BI__builtin_signbitf:
1571   case Builtin::BI__builtin_signbitl: {
1572     return RValue::get(
1573         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1574                            ConvertType(E->getType())));
1575   }
1576   case Builtin::BI__builtin_annotation: {
1577     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1578     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1579                                       AnnVal->getType());
1580 
1581     // Get the annotation string, go through casts. Sema requires this to be a
1582     // non-wide string literal, potentially casted, so the cast<> is safe.
1583     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1584     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1585     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1586   }
1587   case Builtin::BI__builtin_addcb:
1588   case Builtin::BI__builtin_addcs:
1589   case Builtin::BI__builtin_addc:
1590   case Builtin::BI__builtin_addcl:
1591   case Builtin::BI__builtin_addcll:
1592   case Builtin::BI__builtin_subcb:
1593   case Builtin::BI__builtin_subcs:
1594   case Builtin::BI__builtin_subc:
1595   case Builtin::BI__builtin_subcl:
1596   case Builtin::BI__builtin_subcll: {
1597 
1598     // We translate all of these builtins from expressions of the form:
1599     //   int x = ..., y = ..., carryin = ..., carryout, result;
1600     //   result = __builtin_addc(x, y, carryin, &carryout);
1601     //
1602     // to LLVM IR of the form:
1603     //
1604     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1605     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1606     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
1607     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1608     //                                                       i32 %carryin)
1609     //   %result = extractvalue {i32, i1} %tmp2, 0
1610     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
1611     //   %tmp3 = or i1 %carry1, %carry2
1612     //   %tmp4 = zext i1 %tmp3 to i32
1613     //   store i32 %tmp4, i32* %carryout
1614 
1615     // Scalarize our inputs.
1616     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1617     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1618     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1619     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
1620 
1621     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1622     llvm::Intrinsic::ID IntrinsicId;
1623     switch (BuiltinID) {
1624     default: llvm_unreachable("Unknown multiprecision builtin id.");
1625     case Builtin::BI__builtin_addcb:
1626     case Builtin::BI__builtin_addcs:
1627     case Builtin::BI__builtin_addc:
1628     case Builtin::BI__builtin_addcl:
1629     case Builtin::BI__builtin_addcll:
1630       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1631       break;
1632     case Builtin::BI__builtin_subcb:
1633     case Builtin::BI__builtin_subcs:
1634     case Builtin::BI__builtin_subc:
1635     case Builtin::BI__builtin_subcl:
1636     case Builtin::BI__builtin_subcll:
1637       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1638       break;
1639     }
1640 
1641     // Construct our resulting LLVM IR expression.
1642     llvm::Value *Carry1;
1643     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
1644                                               X, Y, Carry1);
1645     llvm::Value *Carry2;
1646     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
1647                                               Sum1, Carryin, Carry2);
1648     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
1649                                                X->getType());
1650     Builder.CreateStore(CarryOut, CarryOutPtr);
1651     return RValue::get(Sum2);
1652   }
1653 
1654   case Builtin::BI__builtin_add_overflow:
1655   case Builtin::BI__builtin_sub_overflow:
1656   case Builtin::BI__builtin_mul_overflow: {
1657     const clang::Expr *LeftArg = E->getArg(0);
1658     const clang::Expr *RightArg = E->getArg(1);
1659     const clang::Expr *ResultArg = E->getArg(2);
1660 
1661     clang::QualType ResultQTy =
1662         ResultArg->getType()->castAs<PointerType>()->getPointeeType();
1663 
1664     WidthAndSignedness LeftInfo =
1665         getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
1666     WidthAndSignedness RightInfo =
1667         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
1668     WidthAndSignedness ResultInfo =
1669         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
1670     WidthAndSignedness EncompassingInfo =
1671         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
1672 
1673     llvm::Type *EncompassingLLVMTy =
1674         llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
1675 
1676     llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
1677 
1678     llvm::Intrinsic::ID IntrinsicId;
1679     switch (BuiltinID) {
1680     default:
1681       llvm_unreachable("Unknown overflow builtin id.");
1682     case Builtin::BI__builtin_add_overflow:
1683       IntrinsicId = EncompassingInfo.Signed
1684                         ? llvm::Intrinsic::sadd_with_overflow
1685                         : llvm::Intrinsic::uadd_with_overflow;
1686       break;
1687     case Builtin::BI__builtin_sub_overflow:
1688       IntrinsicId = EncompassingInfo.Signed
1689                         ? llvm::Intrinsic::ssub_with_overflow
1690                         : llvm::Intrinsic::usub_with_overflow;
1691       break;
1692     case Builtin::BI__builtin_mul_overflow:
1693       IntrinsicId = EncompassingInfo.Signed
1694                         ? llvm::Intrinsic::smul_with_overflow
1695                         : llvm::Intrinsic::umul_with_overflow;
1696       break;
1697     }
1698 
1699     llvm::Value *Left = EmitScalarExpr(LeftArg);
1700     llvm::Value *Right = EmitScalarExpr(RightArg);
1701     Address ResultPtr = EmitPointerWithAlignment(ResultArg);
1702 
1703     // Extend each operand to the encompassing type.
1704     Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
1705     Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
1706 
1707     // Perform the operation on the extended values.
1708     llvm::Value *Overflow, *Result;
1709     Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
1710 
1711     if (EncompassingInfo.Width > ResultInfo.Width) {
1712       // The encompassing type is wider than the result type, so we need to
1713       // truncate it.
1714       llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
1715 
1716       // To see if the truncation caused an overflow, we will extend
1717       // the result and then compare it to the original result.
1718       llvm::Value *ResultTruncExt = Builder.CreateIntCast(
1719           ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
1720       llvm::Value *TruncationOverflow =
1721           Builder.CreateICmpNE(Result, ResultTruncExt);
1722 
1723       Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
1724       Result = ResultTrunc;
1725     }
1726 
1727     // Finally, store the result using the pointer.
1728     bool isVolatile =
1729       ResultArg->getType()->getPointeeType().isVolatileQualified();
1730     Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
1731 
1732     return RValue::get(Overflow);
1733   }
1734 
1735   case Builtin::BI__builtin_uadd_overflow:
1736   case Builtin::BI__builtin_uaddl_overflow:
1737   case Builtin::BI__builtin_uaddll_overflow:
1738   case Builtin::BI__builtin_usub_overflow:
1739   case Builtin::BI__builtin_usubl_overflow:
1740   case Builtin::BI__builtin_usubll_overflow:
1741   case Builtin::BI__builtin_umul_overflow:
1742   case Builtin::BI__builtin_umull_overflow:
1743   case Builtin::BI__builtin_umulll_overflow:
1744   case Builtin::BI__builtin_sadd_overflow:
1745   case Builtin::BI__builtin_saddl_overflow:
1746   case Builtin::BI__builtin_saddll_overflow:
1747   case Builtin::BI__builtin_ssub_overflow:
1748   case Builtin::BI__builtin_ssubl_overflow:
1749   case Builtin::BI__builtin_ssubll_overflow:
1750   case Builtin::BI__builtin_smul_overflow:
1751   case Builtin::BI__builtin_smull_overflow:
1752   case Builtin::BI__builtin_smulll_overflow: {
1753 
1754     // We translate all of these builtins directly to the relevant llvm IR node.
1755 
1756     // Scalarize our inputs.
1757     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1758     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1759     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
1760 
1761     // Decide which of the overflow intrinsics we are lowering to:
1762     llvm::Intrinsic::ID IntrinsicId;
1763     switch (BuiltinID) {
1764     default: llvm_unreachable("Unknown overflow builtin id.");
1765     case Builtin::BI__builtin_uadd_overflow:
1766     case Builtin::BI__builtin_uaddl_overflow:
1767     case Builtin::BI__builtin_uaddll_overflow:
1768       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1769       break;
1770     case Builtin::BI__builtin_usub_overflow:
1771     case Builtin::BI__builtin_usubl_overflow:
1772     case Builtin::BI__builtin_usubll_overflow:
1773       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1774       break;
1775     case Builtin::BI__builtin_umul_overflow:
1776     case Builtin::BI__builtin_umull_overflow:
1777     case Builtin::BI__builtin_umulll_overflow:
1778       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
1779       break;
1780     case Builtin::BI__builtin_sadd_overflow:
1781     case Builtin::BI__builtin_saddl_overflow:
1782     case Builtin::BI__builtin_saddll_overflow:
1783       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
1784       break;
1785     case Builtin::BI__builtin_ssub_overflow:
1786     case Builtin::BI__builtin_ssubl_overflow:
1787     case Builtin::BI__builtin_ssubll_overflow:
1788       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
1789       break;
1790     case Builtin::BI__builtin_smul_overflow:
1791     case Builtin::BI__builtin_smull_overflow:
1792     case Builtin::BI__builtin_smulll_overflow:
1793       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
1794       break;
1795     }
1796 
1797 
1798     llvm::Value *Carry;
1799     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
1800     Builder.CreateStore(Sum, SumOutPtr);
1801 
1802     return RValue::get(Carry);
1803   }
1804   case Builtin::BI__builtin_addressof:
1805     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
1806   case Builtin::BI__builtin_operator_new:
1807     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
1808                                     E->getArg(0), false);
1809   case Builtin::BI__builtin_operator_delete:
1810     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
1811                                     E->getArg(0), true);
1812   case Builtin::BI__noop:
1813     // __noop always evaluates to an integer literal zero.
1814     return RValue::get(ConstantInt::get(IntTy, 0));
1815   case Builtin::BI__builtin_call_with_static_chain: {
1816     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
1817     const Expr *Chain = E->getArg(1);
1818     return EmitCall(Call->getCallee()->getType(),
1819                     EmitScalarExpr(Call->getCallee()), Call, ReturnValue,
1820                     Call->getCalleeDecl(), EmitScalarExpr(Chain));
1821   }
1822   case Builtin::BI_InterlockedExchange:
1823   case Builtin::BI_InterlockedExchangePointer:
1824     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1825   case Builtin::BI_InterlockedCompareExchangePointer: {
1826     llvm::Type *RTy;
1827     llvm::IntegerType *IntType =
1828       IntegerType::get(getLLVMContext(),
1829                        getContext().getTypeSize(E->getType()));
1830     llvm::Type *IntPtrType = IntType->getPointerTo();
1831 
1832     llvm::Value *Destination =
1833       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
1834 
1835     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
1836     RTy = Exchange->getType();
1837     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
1838 
1839     llvm::Value *Comparand =
1840       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
1841 
1842     auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
1843                                               SequentiallyConsistent,
1844                                               SequentiallyConsistent);
1845     Result->setVolatile(true);
1846 
1847     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
1848                                                                          0),
1849                                               RTy));
1850   }
1851   case Builtin::BI_InterlockedCompareExchange: {
1852     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
1853         EmitScalarExpr(E->getArg(0)),
1854         EmitScalarExpr(E->getArg(2)),
1855         EmitScalarExpr(E->getArg(1)),
1856         SequentiallyConsistent,
1857         SequentiallyConsistent);
1858       CXI->setVolatile(true);
1859       return RValue::get(Builder.CreateExtractValue(CXI, 0));
1860   }
1861   case Builtin::BI_InterlockedIncrement: {
1862     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1863       AtomicRMWInst::Add,
1864       EmitScalarExpr(E->getArg(0)),
1865       ConstantInt::get(Int32Ty, 1),
1866       llvm::SequentiallyConsistent);
1867     RMWI->setVolatile(true);
1868     return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(Int32Ty, 1)));
1869   }
1870   case Builtin::BI_InterlockedDecrement: {
1871     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1872       AtomicRMWInst::Sub,
1873       EmitScalarExpr(E->getArg(0)),
1874       ConstantInt::get(Int32Ty, 1),
1875       llvm::SequentiallyConsistent);
1876     RMWI->setVolatile(true);
1877     return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(Int32Ty, 1)));
1878   }
1879   case Builtin::BI_InterlockedExchangeAdd: {
1880     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1881       AtomicRMWInst::Add,
1882       EmitScalarExpr(E->getArg(0)),
1883       EmitScalarExpr(E->getArg(1)),
1884       llvm::SequentiallyConsistent);
1885     RMWI->setVolatile(true);
1886     return RValue::get(RMWI);
1887   }
1888   case Builtin::BI__readfsdword: {
1889     Value *IntToPtr =
1890       Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
1891                              llvm::PointerType::get(CGM.Int32Ty, 257));
1892     LoadInst *Load =
1893         Builder.CreateAlignedLoad(IntToPtr, /*Align=*/4, /*isVolatile=*/true);
1894     return RValue::get(Load);
1895   }
1896 
1897   case Builtin::BI__exception_code:
1898   case Builtin::BI_exception_code:
1899     return RValue::get(EmitSEHExceptionCode());
1900   case Builtin::BI__exception_info:
1901   case Builtin::BI_exception_info:
1902     return RValue::get(EmitSEHExceptionInfo());
1903   case Builtin::BI__abnormal_termination:
1904   case Builtin::BI_abnormal_termination:
1905     return RValue::get(EmitSEHAbnormalTermination());
1906   case Builtin::BI_setjmpex: {
1907     if (getTarget().getTriple().isOSMSVCRT()) {
1908       llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
1909       llvm::AttributeSet ReturnsTwiceAttr =
1910           AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
1911                             llvm::Attribute::ReturnsTwice);
1912       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
1913           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
1914           "_setjmpex", ReturnsTwiceAttr);
1915       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
1916           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
1917       llvm::Value *FrameAddr =
1918           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1919                              ConstantInt::get(Int32Ty, 0));
1920       llvm::Value *Args[] = {Buf, FrameAddr};
1921       llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
1922       CS.setAttributes(ReturnsTwiceAttr);
1923       return RValue::get(CS.getInstruction());
1924     }
1925     break;
1926   }
1927   case Builtin::BI_setjmp: {
1928     if (getTarget().getTriple().isOSMSVCRT()) {
1929       llvm::AttributeSet ReturnsTwiceAttr =
1930           AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
1931                             llvm::Attribute::ReturnsTwice);
1932       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
1933           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
1934       llvm::CallSite CS;
1935       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
1936         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
1937         llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
1938             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
1939             "_setjmp3", ReturnsTwiceAttr);
1940         llvm::Value *Count = ConstantInt::get(IntTy, 0);
1941         llvm::Value *Args[] = {Buf, Count};
1942         CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
1943       } else {
1944         llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
1945         llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
1946             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
1947             "_setjmp", ReturnsTwiceAttr);
1948         llvm::Value *FrameAddr =
1949             Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1950                                ConstantInt::get(Int32Ty, 0));
1951         llvm::Value *Args[] = {Buf, FrameAddr};
1952         CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
1953       }
1954       CS.setAttributes(ReturnsTwiceAttr);
1955       return RValue::get(CS.getInstruction());
1956     }
1957     break;
1958   }
1959 
1960   case Builtin::BI__GetExceptionInfo: {
1961     if (llvm::GlobalVariable *GV =
1962             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
1963       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
1964     break;
1965   }
1966   }
1967 
1968   // If this is an alias for a lib function (e.g. __builtin_sin), emit
1969   // the call using the normal call path, but using the unmangled
1970   // version of the function name.
1971   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
1972     return emitLibraryCall(*this, FD, E,
1973                            CGM.getBuiltinLibFunction(FD, BuiltinID));
1974 
1975   // If this is a predefined lib function (e.g. malloc), emit the call
1976   // using exactly the normal call path.
1977   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
1978     return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
1979 
1980   // Check that a call to a target specific builtin has the correct target
1981   // features.
1982   // This is down here to avoid non-target specific builtins, however, if
1983   // generic builtins start to require generic target features then we
1984   // can move this up to the beginning of the function.
1985   if (!checkBuiltinTargetFeatures(FD))
1986     CGM.getDiags().Report(E->getLocStart(), diag::err_builtin_needs_feature)
1987         << FD->getDeclName()
1988         << CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID);
1989 
1990   // See if we have a target specific intrinsic.
1991   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
1992   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
1993   if (const char *Prefix =
1994           llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) {
1995     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
1996     // NOTE we dont need to perform a compatibility flag check here since the
1997     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
1998     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
1999     if (IntrinsicID == Intrinsic::not_intrinsic)
2000       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix, Name);
2001   }
2002 
2003   if (IntrinsicID != Intrinsic::not_intrinsic) {
2004     SmallVector<Value*, 16> Args;
2005 
2006     // Find out if any arguments are required to be integer constant
2007     // expressions.
2008     unsigned ICEArguments = 0;
2009     ASTContext::GetBuiltinTypeError Error;
2010     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2011     assert(Error == ASTContext::GE_None && "Should not codegen an error");
2012 
2013     Function *F = CGM.getIntrinsic(IntrinsicID);
2014     llvm::FunctionType *FTy = F->getFunctionType();
2015 
2016     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
2017       Value *ArgValue;
2018       // If this is a normal argument, just emit it as a scalar.
2019       if ((ICEArguments & (1 << i)) == 0) {
2020         ArgValue = EmitScalarExpr(E->getArg(i));
2021       } else {
2022         // If this is required to be a constant, constant fold it so that we
2023         // know that the generated intrinsic gets a ConstantInt.
2024         llvm::APSInt Result;
2025         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2026         assert(IsConst && "Constant arg isn't actually constant?");
2027         (void)IsConst;
2028         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2029       }
2030 
2031       // If the intrinsic arg type is different from the builtin arg type
2032       // we need to do a bit cast.
2033       llvm::Type *PTy = FTy->getParamType(i);
2034       if (PTy != ArgValue->getType()) {
2035         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2036                "Must be able to losslessly bit cast to param");
2037         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2038       }
2039 
2040       Args.push_back(ArgValue);
2041     }
2042 
2043     Value *V = Builder.CreateCall(F, Args);
2044     QualType BuiltinRetType = E->getType();
2045 
2046     llvm::Type *RetTy = VoidTy;
2047     if (!BuiltinRetType->isVoidType())
2048       RetTy = ConvertType(BuiltinRetType);
2049 
2050     if (RetTy != V->getType()) {
2051       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
2052              "Must be able to losslessly bit cast result type");
2053       V = Builder.CreateBitCast(V, RetTy);
2054     }
2055 
2056     return RValue::get(V);
2057   }
2058 
2059   // See if we have a target specific builtin that needs to be lowered.
2060   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
2061     return RValue::get(V);
2062 
2063   ErrorUnsupported(E, "builtin function");
2064 
2065   // Unknown builtin, for now just dump it out and return undef.
2066   return GetUndefRValue(E->getType());
2067 }
2068 
2069 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
2070                                         unsigned BuiltinID, const CallExpr *E,
2071                                         llvm::Triple::ArchType Arch) {
2072   switch (Arch) {
2073   case llvm::Triple::arm:
2074   case llvm::Triple::armeb:
2075   case llvm::Triple::thumb:
2076   case llvm::Triple::thumbeb:
2077     return CGF->EmitARMBuiltinExpr(BuiltinID, E);
2078   case llvm::Triple::aarch64:
2079   case llvm::Triple::aarch64_be:
2080     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
2081   case llvm::Triple::x86:
2082   case llvm::Triple::x86_64:
2083     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
2084   case llvm::Triple::ppc:
2085   case llvm::Triple::ppc64:
2086   case llvm::Triple::ppc64le:
2087     return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
2088   case llvm::Triple::r600:
2089   case llvm::Triple::amdgcn:
2090     return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
2091   case llvm::Triple::systemz:
2092     return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
2093   case llvm::Triple::nvptx:
2094   case llvm::Triple::nvptx64:
2095     return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
2096   case llvm::Triple::wasm32:
2097   case llvm::Triple::wasm64:
2098     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
2099   default:
2100     return nullptr;
2101   }
2102 }
2103 
2104 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
2105                                               const CallExpr *E) {
2106   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
2107     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
2108     return EmitTargetArchBuiltinExpr(
2109         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
2110         getContext().getAuxTargetInfo()->getTriple().getArch());
2111   }
2112 
2113   return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
2114                                    getTarget().getTriple().getArch());
2115 }
2116 
2117 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
2118                                      NeonTypeFlags TypeFlags,
2119                                      bool V1Ty=false) {
2120   int IsQuad = TypeFlags.isQuad();
2121   switch (TypeFlags.getEltType()) {
2122   case NeonTypeFlags::Int8:
2123   case NeonTypeFlags::Poly8:
2124     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
2125   case NeonTypeFlags::Int16:
2126   case NeonTypeFlags::Poly16:
2127   case NeonTypeFlags::Float16:
2128     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
2129   case NeonTypeFlags::Int32:
2130     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
2131   case NeonTypeFlags::Int64:
2132   case NeonTypeFlags::Poly64:
2133     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
2134   case NeonTypeFlags::Poly128:
2135     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
2136     // There is a lot of i128 and f128 API missing.
2137     // so we use v16i8 to represent poly128 and get pattern matched.
2138     return llvm::VectorType::get(CGF->Int8Ty, 16);
2139   case NeonTypeFlags::Float32:
2140     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
2141   case NeonTypeFlags::Float64:
2142     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
2143   }
2144   llvm_unreachable("Unknown vector element type!");
2145 }
2146 
2147 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
2148                                           NeonTypeFlags IntTypeFlags) {
2149   int IsQuad = IntTypeFlags.isQuad();
2150   switch (IntTypeFlags.getEltType()) {
2151   case NeonTypeFlags::Int32:
2152     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
2153   case NeonTypeFlags::Int64:
2154     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
2155   default:
2156     llvm_unreachable("Type can't be converted to floating-point!");
2157   }
2158 }
2159 
2160 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
2161   unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
2162   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
2163   return Builder.CreateShuffleVector(V, V, SV, "lane");
2164 }
2165 
2166 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
2167                                      const char *name,
2168                                      unsigned shift, bool rightshift) {
2169   unsigned j = 0;
2170   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
2171        ai != ae; ++ai, ++j)
2172     if (shift > 0 && shift == j)
2173       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
2174     else
2175       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
2176 
2177   return Builder.CreateCall(F, Ops, name);
2178 }
2179 
2180 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
2181                                             bool neg) {
2182   int SV = cast<ConstantInt>(V)->getSExtValue();
2183   return ConstantInt::get(Ty, neg ? -SV : SV);
2184 }
2185 
2186 // \brief Right-shift a vector by a constant.
2187 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
2188                                           llvm::Type *Ty, bool usgn,
2189                                           const char *name) {
2190   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
2191 
2192   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
2193   int EltSize = VTy->getScalarSizeInBits();
2194 
2195   Vec = Builder.CreateBitCast(Vec, Ty);
2196 
2197   // lshr/ashr are undefined when the shift amount is equal to the vector
2198   // element size.
2199   if (ShiftAmt == EltSize) {
2200     if (usgn) {
2201       // Right-shifting an unsigned value by its size yields 0.
2202       return llvm::ConstantAggregateZero::get(VTy);
2203     } else {
2204       // Right-shifting a signed value by its size is equivalent
2205       // to a shift of size-1.
2206       --ShiftAmt;
2207       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
2208     }
2209   }
2210 
2211   Shift = EmitNeonShiftVector(Shift, Ty, false);
2212   if (usgn)
2213     return Builder.CreateLShr(Vec, Shift, name);
2214   else
2215     return Builder.CreateAShr(Vec, Shift, name);
2216 }
2217 
2218 enum {
2219   AddRetType = (1 << 0),
2220   Add1ArgType = (1 << 1),
2221   Add2ArgTypes = (1 << 2),
2222 
2223   VectorizeRetType = (1 << 3),
2224   VectorizeArgTypes = (1 << 4),
2225 
2226   InventFloatType = (1 << 5),
2227   UnsignedAlts = (1 << 6),
2228 
2229   Use64BitVectors = (1 << 7),
2230   Use128BitVectors = (1 << 8),
2231 
2232   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
2233   VectorRet = AddRetType | VectorizeRetType,
2234   VectorRetGetArgs01 =
2235       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
2236   FpCmpzModifiers =
2237       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
2238 };
2239 
2240 namespace {
2241 struct NeonIntrinsicInfo {
2242   unsigned BuiltinID;
2243   unsigned LLVMIntrinsic;
2244   unsigned AltLLVMIntrinsic;
2245   const char *NameHint;
2246   unsigned TypeModifier;
2247 
2248   bool operator<(unsigned RHSBuiltinID) const {
2249     return BuiltinID < RHSBuiltinID;
2250   }
2251 };
2252 } // end anonymous namespace
2253 
2254 #define NEONMAP0(NameBase) \
2255   { NEON::BI__builtin_neon_ ## NameBase, 0, 0, #NameBase, 0 }
2256 
2257 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
2258   { NEON:: BI__builtin_neon_ ## NameBase, \
2259       Intrinsic::LLVMIntrinsic, 0, #NameBase, TypeModifier }
2260 
2261 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
2262   { NEON:: BI__builtin_neon_ ## NameBase, \
2263       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
2264       #NameBase, TypeModifier }
2265 
2266 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
2267   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2268   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2269   NEONMAP1(vabs_v, arm_neon_vabs, 0),
2270   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
2271   NEONMAP0(vaddhn_v),
2272   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
2273   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
2274   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
2275   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
2276   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
2277   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
2278   NEONMAP1(vcage_v, arm_neon_vacge, 0),
2279   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
2280   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
2281   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
2282   NEONMAP1(vcale_v, arm_neon_vacge, 0),
2283   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
2284   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
2285   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
2286   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
2287   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
2288   NEONMAP1(vclz_v, ctlz, Add1ArgType),
2289   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2290   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2291   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2292   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
2293   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
2294   NEONMAP0(vcvt_f32_v),
2295   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2296   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2297   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2298   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2299   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2300   NEONMAP0(vcvt_s32_v),
2301   NEONMAP0(vcvt_s64_v),
2302   NEONMAP0(vcvt_u32_v),
2303   NEONMAP0(vcvt_u64_v),
2304   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
2305   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
2306   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
2307   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
2308   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
2309   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
2310   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
2311   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
2312   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
2313   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
2314   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
2315   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
2316   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
2317   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
2318   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
2319   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
2320   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
2321   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
2322   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
2323   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
2324   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
2325   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
2326   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
2327   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
2328   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
2329   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
2330   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
2331   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
2332   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
2333   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
2334   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
2335   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
2336   NEONMAP0(vcvtq_f32_v),
2337   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2338   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2339   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2340   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2341   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2342   NEONMAP0(vcvtq_s32_v),
2343   NEONMAP0(vcvtq_s64_v),
2344   NEONMAP0(vcvtq_u32_v),
2345   NEONMAP0(vcvtq_u64_v),
2346   NEONMAP0(vext_v),
2347   NEONMAP0(vextq_v),
2348   NEONMAP0(vfma_v),
2349   NEONMAP0(vfmaq_v),
2350   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2351   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2352   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2353   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2354   NEONMAP0(vld1_dup_v),
2355   NEONMAP1(vld1_v, arm_neon_vld1, 0),
2356   NEONMAP0(vld1q_dup_v),
2357   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
2358   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
2359   NEONMAP1(vld2_v, arm_neon_vld2, 0),
2360   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
2361   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
2362   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
2363   NEONMAP1(vld3_v, arm_neon_vld3, 0),
2364   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
2365   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
2366   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
2367   NEONMAP1(vld4_v, arm_neon_vld4, 0),
2368   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
2369   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
2370   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
2371   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
2372   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
2373   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
2374   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
2375   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
2376   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
2377   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
2378   NEONMAP0(vmovl_v),
2379   NEONMAP0(vmovn_v),
2380   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
2381   NEONMAP0(vmull_v),
2382   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
2383   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
2384   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
2385   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
2386   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
2387   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
2388   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
2389   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
2390   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
2391   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
2392   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
2393   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
2394   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
2395   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
2396   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
2397   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
2398   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
2399   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
2400   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
2401   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
2402   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
2403   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
2404   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
2405   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
2406   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
2407   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
2408   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
2409   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
2410   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
2411   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
2412   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
2413   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
2414   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
2415   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
2416   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
2417   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
2418   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
2419   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
2420   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
2421   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
2422   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
2423   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
2424   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
2425   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
2426   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
2427   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
2428   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
2429   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
2430   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
2431   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
2432   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
2433   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
2434   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
2435   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
2436   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
2437   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
2438   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
2439   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
2440   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
2441   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
2442   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
2443   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
2444   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
2445   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
2446   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
2447   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
2448   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
2449   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
2450   NEONMAP0(vshl_n_v),
2451   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
2452   NEONMAP0(vshll_n_v),
2453   NEONMAP0(vshlq_n_v),
2454   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
2455   NEONMAP0(vshr_n_v),
2456   NEONMAP0(vshrn_n_v),
2457   NEONMAP0(vshrq_n_v),
2458   NEONMAP1(vst1_v, arm_neon_vst1, 0),
2459   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
2460   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
2461   NEONMAP1(vst2_v, arm_neon_vst2, 0),
2462   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
2463   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
2464   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
2465   NEONMAP1(vst3_v, arm_neon_vst3, 0),
2466   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
2467   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
2468   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
2469   NEONMAP1(vst4_v, arm_neon_vst4, 0),
2470   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
2471   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
2472   NEONMAP0(vsubhn_v),
2473   NEONMAP0(vtrn_v),
2474   NEONMAP0(vtrnq_v),
2475   NEONMAP0(vtst_v),
2476   NEONMAP0(vtstq_v),
2477   NEONMAP0(vuzp_v),
2478   NEONMAP0(vuzpq_v),
2479   NEONMAP0(vzip_v),
2480   NEONMAP0(vzipq_v)
2481 };
2482 
2483 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
2484   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
2485   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
2486   NEONMAP0(vaddhn_v),
2487   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
2488   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
2489   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
2490   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
2491   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
2492   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
2493   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
2494   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
2495   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
2496   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
2497   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
2498   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
2499   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
2500   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
2501   NEONMAP1(vclz_v, ctlz, Add1ArgType),
2502   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2503   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2504   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2505   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
2506   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
2507   NEONMAP0(vcvt_f32_v),
2508   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2509   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2510   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
2511   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
2512   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
2513   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
2514   NEONMAP0(vcvtq_f32_v),
2515   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2516   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2517   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
2518   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
2519   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
2520   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
2521   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
2522   NEONMAP0(vext_v),
2523   NEONMAP0(vextq_v),
2524   NEONMAP0(vfma_v),
2525   NEONMAP0(vfmaq_v),
2526   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
2527   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
2528   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
2529   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
2530   NEONMAP0(vmovl_v),
2531   NEONMAP0(vmovn_v),
2532   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
2533   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
2534   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
2535   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
2536   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
2537   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
2538   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
2539   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
2540   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
2541   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
2542   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
2543   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
2544   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
2545   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
2546   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
2547   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
2548   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
2549   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
2550   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
2551   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
2552   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
2553   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
2554   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
2555   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
2556   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
2557   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
2558   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
2559   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
2560   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
2561   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
2562   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
2563   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
2564   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
2565   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
2566   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
2567   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
2568   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
2569   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
2570   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
2571   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
2572   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
2573   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
2574   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
2575   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
2576   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
2577   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
2578   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
2579   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
2580   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
2581   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
2582   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
2583   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
2584   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
2585   NEONMAP0(vshl_n_v),
2586   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
2587   NEONMAP0(vshll_n_v),
2588   NEONMAP0(vshlq_n_v),
2589   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
2590   NEONMAP0(vshr_n_v),
2591   NEONMAP0(vshrn_n_v),
2592   NEONMAP0(vshrq_n_v),
2593   NEONMAP0(vsubhn_v),
2594   NEONMAP0(vtst_v),
2595   NEONMAP0(vtstq_v),
2596 };
2597 
2598 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
2599   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
2600   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
2601   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
2602   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
2603   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
2604   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
2605   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
2606   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
2607   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
2608   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2609   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
2610   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
2611   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
2612   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
2613   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2614   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2615   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
2616   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
2617   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
2618   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
2619   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
2620   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
2621   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
2622   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
2623   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
2624   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
2625   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
2626   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
2627   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
2628   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
2629   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
2630   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
2631   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
2632   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
2633   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
2634   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
2635   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
2636   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
2637   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
2638   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
2639   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
2640   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
2641   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
2642   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
2643   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
2644   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
2645   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
2646   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
2647   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
2648   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2649   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2650   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2651   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2652   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
2653   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
2654   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2655   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2656   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
2657   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
2658   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2659   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2660   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2661   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
2662   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
2663   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
2664   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
2665   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
2666   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
2667   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
2668   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
2669   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
2670   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
2671   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2672   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2673   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2674   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2675   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2676   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2677   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2678   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2679   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
2680   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
2681   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
2682   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
2683   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
2684   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
2685   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
2686   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
2687   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
2688   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
2689   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
2690   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
2691   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
2692   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
2693   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
2694   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
2695   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
2696   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
2697   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
2698   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
2699   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
2700   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
2701   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
2702   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
2703   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
2704   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
2705   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
2706   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
2707   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
2708   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
2709   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
2710   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
2711   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
2712   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
2713   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
2714   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
2715   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
2716   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
2717   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
2718   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
2719   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
2720   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
2721   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
2722   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
2723   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
2724   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
2725   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
2726   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
2727   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
2728   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
2729   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
2730   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
2731   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
2732   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
2733   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
2734   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
2735   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
2736   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
2737   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
2738   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
2739   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
2740   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
2741   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
2742   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
2743   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
2744   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
2745   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
2746   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
2747   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
2748   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
2749   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
2750   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
2751   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
2752   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
2753   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
2754   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
2755   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
2756   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
2757   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
2758   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
2759   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
2760   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
2761   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
2762   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
2763   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
2764   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
2765   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
2766   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
2767   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
2768   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
2769   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
2770   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
2771   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
2772   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
2773   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
2774   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
2775   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
2776   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
2777   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
2778   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
2779   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
2780   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
2781   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
2782   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
2783   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
2784   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
2785   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
2786   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
2787   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
2788   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
2789   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
2790   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
2791 };
2792 
2793 #undef NEONMAP0
2794 #undef NEONMAP1
2795 #undef NEONMAP2
2796 
2797 static bool NEONSIMDIntrinsicsProvenSorted = false;
2798 
2799 static bool AArch64SIMDIntrinsicsProvenSorted = false;
2800 static bool AArch64SISDIntrinsicsProvenSorted = false;
2801 
2802 
2803 static const NeonIntrinsicInfo *
2804 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
2805                        unsigned BuiltinID, bool &MapProvenSorted) {
2806 
2807 #ifndef NDEBUG
2808   if (!MapProvenSorted) {
2809     // FIXME: use std::is_sorted once C++11 is allowed
2810     for (unsigned i = 0; i < IntrinsicMap.size() - 1; ++i)
2811       assert(IntrinsicMap[i].BuiltinID <= IntrinsicMap[i + 1].BuiltinID);
2812     MapProvenSorted = true;
2813   }
2814 #endif
2815 
2816   const NeonIntrinsicInfo *Builtin =
2817       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
2818 
2819   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
2820     return Builtin;
2821 
2822   return nullptr;
2823 }
2824 
2825 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
2826                                                    unsigned Modifier,
2827                                                    llvm::Type *ArgType,
2828                                                    const CallExpr *E) {
2829   int VectorSize = 0;
2830   if (Modifier & Use64BitVectors)
2831     VectorSize = 64;
2832   else if (Modifier & Use128BitVectors)
2833     VectorSize = 128;
2834 
2835   // Return type.
2836   SmallVector<llvm::Type *, 3> Tys;
2837   if (Modifier & AddRetType) {
2838     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
2839     if (Modifier & VectorizeRetType)
2840       Ty = llvm::VectorType::get(
2841           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
2842 
2843     Tys.push_back(Ty);
2844   }
2845 
2846   // Arguments.
2847   if (Modifier & VectorizeArgTypes) {
2848     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
2849     ArgType = llvm::VectorType::get(ArgType, Elts);
2850   }
2851 
2852   if (Modifier & (Add1ArgType | Add2ArgTypes))
2853     Tys.push_back(ArgType);
2854 
2855   if (Modifier & Add2ArgTypes)
2856     Tys.push_back(ArgType);
2857 
2858   if (Modifier & InventFloatType)
2859     Tys.push_back(FloatTy);
2860 
2861   return CGM.getIntrinsic(IntrinsicID, Tys);
2862 }
2863 
2864 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
2865                                             const NeonIntrinsicInfo &SISDInfo,
2866                                             SmallVectorImpl<Value *> &Ops,
2867                                             const CallExpr *E) {
2868   unsigned BuiltinID = SISDInfo.BuiltinID;
2869   unsigned int Int = SISDInfo.LLVMIntrinsic;
2870   unsigned Modifier = SISDInfo.TypeModifier;
2871   const char *s = SISDInfo.NameHint;
2872 
2873   switch (BuiltinID) {
2874   case NEON::BI__builtin_neon_vcled_s64:
2875   case NEON::BI__builtin_neon_vcled_u64:
2876   case NEON::BI__builtin_neon_vcles_f32:
2877   case NEON::BI__builtin_neon_vcled_f64:
2878   case NEON::BI__builtin_neon_vcltd_s64:
2879   case NEON::BI__builtin_neon_vcltd_u64:
2880   case NEON::BI__builtin_neon_vclts_f32:
2881   case NEON::BI__builtin_neon_vcltd_f64:
2882   case NEON::BI__builtin_neon_vcales_f32:
2883   case NEON::BI__builtin_neon_vcaled_f64:
2884   case NEON::BI__builtin_neon_vcalts_f32:
2885   case NEON::BI__builtin_neon_vcaltd_f64:
2886     // Only one direction of comparisons actually exist, cmle is actually a cmge
2887     // with swapped operands. The table gives us the right intrinsic but we
2888     // still need to do the swap.
2889     std::swap(Ops[0], Ops[1]);
2890     break;
2891   }
2892 
2893   assert(Int && "Generic code assumes a valid intrinsic");
2894 
2895   // Determine the type(s) of this overloaded AArch64 intrinsic.
2896   const Expr *Arg = E->getArg(0);
2897   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
2898   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
2899 
2900   int j = 0;
2901   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
2902   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
2903        ai != ae; ++ai, ++j) {
2904     llvm::Type *ArgTy = ai->getType();
2905     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
2906              ArgTy->getPrimitiveSizeInBits())
2907       continue;
2908 
2909     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
2910     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
2911     // it before inserting.
2912     Ops[j] =
2913         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
2914     Ops[j] =
2915         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
2916   }
2917 
2918   Value *Result = CGF.EmitNeonCall(F, Ops, s);
2919   llvm::Type *ResultType = CGF.ConvertType(E->getType());
2920   if (ResultType->getPrimitiveSizeInBits() <
2921       Result->getType()->getPrimitiveSizeInBits())
2922     return CGF.Builder.CreateExtractElement(Result, C0);
2923 
2924   return CGF.Builder.CreateBitCast(Result, ResultType, s);
2925 }
2926 
2927 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
2928     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
2929     const char *NameHint, unsigned Modifier, const CallExpr *E,
2930     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
2931   // Get the last argument, which specifies the vector type.
2932   llvm::APSInt NeonTypeConst;
2933   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
2934   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
2935     return nullptr;
2936 
2937   // Determine the type of this overloaded NEON intrinsic.
2938   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
2939   bool Usgn = Type.isUnsigned();
2940   bool Quad = Type.isQuad();
2941 
2942   llvm::VectorType *VTy = GetNeonType(this, Type);
2943   llvm::Type *Ty = VTy;
2944   if (!Ty)
2945     return nullptr;
2946 
2947   auto getAlignmentValue32 = [&](Address addr) -> Value* {
2948     return Builder.getInt32(addr.getAlignment().getQuantity());
2949   };
2950 
2951   unsigned Int = LLVMIntrinsic;
2952   if ((Modifier & UnsignedAlts) && !Usgn)
2953     Int = AltLLVMIntrinsic;
2954 
2955   switch (BuiltinID) {
2956   default: break;
2957   case NEON::BI__builtin_neon_vabs_v:
2958   case NEON::BI__builtin_neon_vabsq_v:
2959     if (VTy->getElementType()->isFloatingPointTy())
2960       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
2961     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
2962   case NEON::BI__builtin_neon_vaddhn_v: {
2963     llvm::VectorType *SrcTy =
2964         llvm::VectorType::getExtendedElementVectorType(VTy);
2965 
2966     // %sum = add <4 x i32> %lhs, %rhs
2967     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2968     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
2969     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
2970 
2971     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
2972     Constant *ShiftAmt =
2973         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
2974     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
2975 
2976     // %res = trunc <4 x i32> %high to <4 x i16>
2977     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
2978   }
2979   case NEON::BI__builtin_neon_vcale_v:
2980   case NEON::BI__builtin_neon_vcaleq_v:
2981   case NEON::BI__builtin_neon_vcalt_v:
2982   case NEON::BI__builtin_neon_vcaltq_v:
2983     std::swap(Ops[0], Ops[1]);
2984   case NEON::BI__builtin_neon_vcage_v:
2985   case NEON::BI__builtin_neon_vcageq_v:
2986   case NEON::BI__builtin_neon_vcagt_v:
2987   case NEON::BI__builtin_neon_vcagtq_v: {
2988     llvm::Type *VecFlt = llvm::VectorType::get(
2989         VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
2990         VTy->getNumElements());
2991     llvm::Type *Tys[] = { VTy, VecFlt };
2992     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2993     return EmitNeonCall(F, Ops, NameHint);
2994   }
2995   case NEON::BI__builtin_neon_vclz_v:
2996   case NEON::BI__builtin_neon_vclzq_v:
2997     // We generate target-independent intrinsic, which needs a second argument
2998     // for whether or not clz of zero is undefined; on ARM it isn't.
2999     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3000     break;
3001   case NEON::BI__builtin_neon_vcvt_f32_v:
3002   case NEON::BI__builtin_neon_vcvtq_f32_v:
3003     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3004     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3005     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3006                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3007   case NEON::BI__builtin_neon_vcvt_n_f32_v:
3008   case NEON::BI__builtin_neon_vcvt_n_f64_v:
3009   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3010   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3011     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3012     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
3013     Function *F = CGM.getIntrinsic(Int, Tys);
3014     return EmitNeonCall(F, Ops, "vcvt_n");
3015   }
3016   case NEON::BI__builtin_neon_vcvt_n_s32_v:
3017   case NEON::BI__builtin_neon_vcvt_n_u32_v:
3018   case NEON::BI__builtin_neon_vcvt_n_s64_v:
3019   case NEON::BI__builtin_neon_vcvt_n_u64_v:
3020   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3021   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3022   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3023   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3024     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3025     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3026     return EmitNeonCall(F, Ops, "vcvt_n");
3027   }
3028   case NEON::BI__builtin_neon_vcvt_s32_v:
3029   case NEON::BI__builtin_neon_vcvt_u32_v:
3030   case NEON::BI__builtin_neon_vcvt_s64_v:
3031   case NEON::BI__builtin_neon_vcvt_u64_v:
3032   case NEON::BI__builtin_neon_vcvtq_s32_v:
3033   case NEON::BI__builtin_neon_vcvtq_u32_v:
3034   case NEON::BI__builtin_neon_vcvtq_s64_v:
3035   case NEON::BI__builtin_neon_vcvtq_u64_v: {
3036     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3037     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3038                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3039   }
3040   case NEON::BI__builtin_neon_vcvta_s32_v:
3041   case NEON::BI__builtin_neon_vcvta_s64_v:
3042   case NEON::BI__builtin_neon_vcvta_u32_v:
3043   case NEON::BI__builtin_neon_vcvta_u64_v:
3044   case NEON::BI__builtin_neon_vcvtaq_s32_v:
3045   case NEON::BI__builtin_neon_vcvtaq_s64_v:
3046   case NEON::BI__builtin_neon_vcvtaq_u32_v:
3047   case NEON::BI__builtin_neon_vcvtaq_u64_v:
3048   case NEON::BI__builtin_neon_vcvtn_s32_v:
3049   case NEON::BI__builtin_neon_vcvtn_s64_v:
3050   case NEON::BI__builtin_neon_vcvtn_u32_v:
3051   case NEON::BI__builtin_neon_vcvtn_u64_v:
3052   case NEON::BI__builtin_neon_vcvtnq_s32_v:
3053   case NEON::BI__builtin_neon_vcvtnq_s64_v:
3054   case NEON::BI__builtin_neon_vcvtnq_u32_v:
3055   case NEON::BI__builtin_neon_vcvtnq_u64_v:
3056   case NEON::BI__builtin_neon_vcvtp_s32_v:
3057   case NEON::BI__builtin_neon_vcvtp_s64_v:
3058   case NEON::BI__builtin_neon_vcvtp_u32_v:
3059   case NEON::BI__builtin_neon_vcvtp_u64_v:
3060   case NEON::BI__builtin_neon_vcvtpq_s32_v:
3061   case NEON::BI__builtin_neon_vcvtpq_s64_v:
3062   case NEON::BI__builtin_neon_vcvtpq_u32_v:
3063   case NEON::BI__builtin_neon_vcvtpq_u64_v:
3064   case NEON::BI__builtin_neon_vcvtm_s32_v:
3065   case NEON::BI__builtin_neon_vcvtm_s64_v:
3066   case NEON::BI__builtin_neon_vcvtm_u32_v:
3067   case NEON::BI__builtin_neon_vcvtm_u64_v:
3068   case NEON::BI__builtin_neon_vcvtmq_s32_v:
3069   case NEON::BI__builtin_neon_vcvtmq_s64_v:
3070   case NEON::BI__builtin_neon_vcvtmq_u32_v:
3071   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
3072     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3073     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
3074   }
3075   case NEON::BI__builtin_neon_vext_v:
3076   case NEON::BI__builtin_neon_vextq_v: {
3077     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3078     SmallVector<Constant*, 16> Indices;
3079     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3080       Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
3081 
3082     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3083     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3084     Value *SV = llvm::ConstantVector::get(Indices);
3085     return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
3086   }
3087   case NEON::BI__builtin_neon_vfma_v:
3088   case NEON::BI__builtin_neon_vfmaq_v: {
3089     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
3090     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3091     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3092     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3093 
3094     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
3095     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
3096   }
3097   case NEON::BI__builtin_neon_vld1_v:
3098   case NEON::BI__builtin_neon_vld1q_v: {
3099     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3100     Ops.push_back(getAlignmentValue32(PtrOp0));
3101     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
3102   }
3103   case NEON::BI__builtin_neon_vld2_v:
3104   case NEON::BI__builtin_neon_vld2q_v:
3105   case NEON::BI__builtin_neon_vld3_v:
3106   case NEON::BI__builtin_neon_vld3q_v:
3107   case NEON::BI__builtin_neon_vld4_v:
3108   case NEON::BI__builtin_neon_vld4q_v: {
3109     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3110     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3111     Value *Align = getAlignmentValue32(PtrOp1);
3112     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
3113     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3114     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3115     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3116   }
3117   case NEON::BI__builtin_neon_vld1_dup_v:
3118   case NEON::BI__builtin_neon_vld1q_dup_v: {
3119     Value *V = UndefValue::get(Ty);
3120     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3121     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
3122     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
3123     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3124     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
3125     return EmitNeonSplat(Ops[0], CI);
3126   }
3127   case NEON::BI__builtin_neon_vld2_lane_v:
3128   case NEON::BI__builtin_neon_vld2q_lane_v:
3129   case NEON::BI__builtin_neon_vld3_lane_v:
3130   case NEON::BI__builtin_neon_vld3q_lane_v:
3131   case NEON::BI__builtin_neon_vld4_lane_v:
3132   case NEON::BI__builtin_neon_vld4q_lane_v: {
3133     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3134     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3135     for (unsigned I = 2; I < Ops.size() - 1; ++I)
3136       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
3137     Ops.push_back(getAlignmentValue32(PtrOp1));
3138     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
3139     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3140     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3141     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3142   }
3143   case NEON::BI__builtin_neon_vmovl_v: {
3144     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
3145     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
3146     if (Usgn)
3147       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
3148     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
3149   }
3150   case NEON::BI__builtin_neon_vmovn_v: {
3151     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3152     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
3153     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
3154   }
3155   case NEON::BI__builtin_neon_vmull_v:
3156     // FIXME: the integer vmull operations could be emitted in terms of pure
3157     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
3158     // hoisting the exts outside loops. Until global ISel comes along that can
3159     // see through such movement this leads to bad CodeGen. So we need an
3160     // intrinsic for now.
3161     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
3162     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
3163     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
3164   case NEON::BI__builtin_neon_vpadal_v:
3165   case NEON::BI__builtin_neon_vpadalq_v: {
3166     // The source operand type has twice as many elements of half the size.
3167     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3168     llvm::Type *EltTy =
3169       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3170     llvm::Type *NarrowTy =
3171       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3172     llvm::Type *Tys[2] = { Ty, NarrowTy };
3173     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
3174   }
3175   case NEON::BI__builtin_neon_vpaddl_v:
3176   case NEON::BI__builtin_neon_vpaddlq_v: {
3177     // The source operand type has twice as many elements of half the size.
3178     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3179     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3180     llvm::Type *NarrowTy =
3181       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3182     llvm::Type *Tys[2] = { Ty, NarrowTy };
3183     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
3184   }
3185   case NEON::BI__builtin_neon_vqdmlal_v:
3186   case NEON::BI__builtin_neon_vqdmlsl_v: {
3187     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
3188     Ops[1] =
3189         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
3190     Ops.resize(2);
3191     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
3192   }
3193   case NEON::BI__builtin_neon_vqshl_n_v:
3194   case NEON::BI__builtin_neon_vqshlq_n_v:
3195     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
3196                         1, false);
3197   case NEON::BI__builtin_neon_vqshlu_n_v:
3198   case NEON::BI__builtin_neon_vqshluq_n_v:
3199     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
3200                         1, false);
3201   case NEON::BI__builtin_neon_vrecpe_v:
3202   case NEON::BI__builtin_neon_vrecpeq_v:
3203   case NEON::BI__builtin_neon_vrsqrte_v:
3204   case NEON::BI__builtin_neon_vrsqrteq_v:
3205     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
3206     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
3207 
3208   case NEON::BI__builtin_neon_vrshr_n_v:
3209   case NEON::BI__builtin_neon_vrshrq_n_v:
3210     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
3211                         1, true);
3212   case NEON::BI__builtin_neon_vshl_n_v:
3213   case NEON::BI__builtin_neon_vshlq_n_v:
3214     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
3215     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
3216                              "vshl_n");
3217   case NEON::BI__builtin_neon_vshll_n_v: {
3218     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
3219     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3220     if (Usgn)
3221       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
3222     else
3223       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
3224     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
3225     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
3226   }
3227   case NEON::BI__builtin_neon_vshrn_n_v: {
3228     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3229     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3230     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
3231     if (Usgn)
3232       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
3233     else
3234       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
3235     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
3236   }
3237   case NEON::BI__builtin_neon_vshr_n_v:
3238   case NEON::BI__builtin_neon_vshrq_n_v:
3239     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
3240   case NEON::BI__builtin_neon_vst1_v:
3241   case NEON::BI__builtin_neon_vst1q_v:
3242   case NEON::BI__builtin_neon_vst2_v:
3243   case NEON::BI__builtin_neon_vst2q_v:
3244   case NEON::BI__builtin_neon_vst3_v:
3245   case NEON::BI__builtin_neon_vst3q_v:
3246   case NEON::BI__builtin_neon_vst4_v:
3247   case NEON::BI__builtin_neon_vst4q_v:
3248   case NEON::BI__builtin_neon_vst2_lane_v:
3249   case NEON::BI__builtin_neon_vst2q_lane_v:
3250   case NEON::BI__builtin_neon_vst3_lane_v:
3251   case NEON::BI__builtin_neon_vst3q_lane_v:
3252   case NEON::BI__builtin_neon_vst4_lane_v:
3253   case NEON::BI__builtin_neon_vst4q_lane_v: {
3254     llvm::Type *Tys[] = {Int8PtrTy, Ty};
3255     Ops.push_back(getAlignmentValue32(PtrOp0));
3256     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
3257   }
3258   case NEON::BI__builtin_neon_vsubhn_v: {
3259     llvm::VectorType *SrcTy =
3260         llvm::VectorType::getExtendedElementVectorType(VTy);
3261 
3262     // %sum = add <4 x i32> %lhs, %rhs
3263     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3264     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3265     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
3266 
3267     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3268     Constant *ShiftAmt =
3269         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3270     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
3271 
3272     // %res = trunc <4 x i32> %high to <4 x i16>
3273     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
3274   }
3275   case NEON::BI__builtin_neon_vtrn_v:
3276   case NEON::BI__builtin_neon_vtrnq_v: {
3277     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3278     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3279     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3280     Value *SV = nullptr;
3281 
3282     for (unsigned vi = 0; vi != 2; ++vi) {
3283       SmallVector<Constant*, 16> Indices;
3284       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3285         Indices.push_back(Builder.getInt32(i+vi));
3286         Indices.push_back(Builder.getInt32(i+e+vi));
3287       }
3288       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3289       SV = llvm::ConstantVector::get(Indices);
3290       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
3291       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3292     }
3293     return SV;
3294   }
3295   case NEON::BI__builtin_neon_vtst_v:
3296   case NEON::BI__builtin_neon_vtstq_v: {
3297     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3298     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3299     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
3300     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
3301                                 ConstantAggregateZero::get(Ty));
3302     return Builder.CreateSExt(Ops[0], Ty, "vtst");
3303   }
3304   case NEON::BI__builtin_neon_vuzp_v:
3305   case NEON::BI__builtin_neon_vuzpq_v: {
3306     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3307     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3308     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3309     Value *SV = nullptr;
3310 
3311     for (unsigned vi = 0; vi != 2; ++vi) {
3312       SmallVector<Constant*, 16> Indices;
3313       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3314         Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
3315 
3316       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3317       SV = llvm::ConstantVector::get(Indices);
3318       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
3319       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3320     }
3321     return SV;
3322   }
3323   case NEON::BI__builtin_neon_vzip_v:
3324   case NEON::BI__builtin_neon_vzipq_v: {
3325     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3326     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3327     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3328     Value *SV = nullptr;
3329 
3330     for (unsigned vi = 0; vi != 2; ++vi) {
3331       SmallVector<Constant*, 16> Indices;
3332       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3333         Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
3334         Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
3335       }
3336       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3337       SV = llvm::ConstantVector::get(Indices);
3338       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
3339       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3340     }
3341     return SV;
3342   }
3343   }
3344 
3345   assert(Int && "Expected valid intrinsic number");
3346 
3347   // Determine the type(s) of this overloaded AArch64 intrinsic.
3348   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
3349 
3350   Value *Result = EmitNeonCall(F, Ops, NameHint);
3351   llvm::Type *ResultType = ConvertType(E->getType());
3352   // AArch64 intrinsic one-element vector type cast to
3353   // scalar type expected by the builtin
3354   return Builder.CreateBitCast(Result, ResultType, NameHint);
3355 }
3356 
3357 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
3358     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
3359     const CmpInst::Predicate Ip, const Twine &Name) {
3360   llvm::Type *OTy = Op->getType();
3361 
3362   // FIXME: this is utterly horrific. We should not be looking at previous
3363   // codegen context to find out what needs doing. Unfortunately TableGen
3364   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
3365   // (etc).
3366   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
3367     OTy = BI->getOperand(0)->getType();
3368 
3369   Op = Builder.CreateBitCast(Op, OTy);
3370   if (OTy->getScalarType()->isFloatingPointTy()) {
3371     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
3372   } else {
3373     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
3374   }
3375   return Builder.CreateSExt(Op, Ty, Name);
3376 }
3377 
3378 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
3379                                  Value *ExtOp, Value *IndexOp,
3380                                  llvm::Type *ResTy, unsigned IntID,
3381                                  const char *Name) {
3382   SmallVector<Value *, 2> TblOps;
3383   if (ExtOp)
3384     TblOps.push_back(ExtOp);
3385 
3386   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
3387   SmallVector<Constant*, 16> Indices;
3388   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
3389   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
3390     Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i));
3391     Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1));
3392   }
3393   Value *SV = llvm::ConstantVector::get(Indices);
3394 
3395   int PairPos = 0, End = Ops.size() - 1;
3396   while (PairPos < End) {
3397     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
3398                                                      Ops[PairPos+1], SV, Name));
3399     PairPos += 2;
3400   }
3401 
3402   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
3403   // of the 128-bit lookup table with zero.
3404   if (PairPos == End) {
3405     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
3406     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
3407                                                      ZeroTbl, SV, Name));
3408   }
3409 
3410   Function *TblF;
3411   TblOps.push_back(IndexOp);
3412   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
3413 
3414   return CGF.EmitNeonCall(TblF, TblOps, Name);
3415 }
3416 
3417 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
3418   unsigned Value;
3419   switch (BuiltinID) {
3420   default:
3421     return nullptr;
3422   case ARM::BI__builtin_arm_nop:
3423     Value = 0;
3424     break;
3425   case ARM::BI__builtin_arm_yield:
3426   case ARM::BI__yield:
3427     Value = 1;
3428     break;
3429   case ARM::BI__builtin_arm_wfe:
3430   case ARM::BI__wfe:
3431     Value = 2;
3432     break;
3433   case ARM::BI__builtin_arm_wfi:
3434   case ARM::BI__wfi:
3435     Value = 3;
3436     break;
3437   case ARM::BI__builtin_arm_sev:
3438   case ARM::BI__sev:
3439     Value = 4;
3440     break;
3441   case ARM::BI__builtin_arm_sevl:
3442   case ARM::BI__sevl:
3443     Value = 5;
3444     break;
3445   }
3446 
3447   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
3448                             llvm::ConstantInt::get(Int32Ty, Value));
3449 }
3450 
3451 // Generates the IR for the read/write special register builtin,
3452 // ValueType is the type of the value that is to be written or read,
3453 // RegisterType is the type of the register being written to or read from.
3454 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
3455                                          const CallExpr *E,
3456                                          llvm::Type *RegisterType,
3457                                          llvm::Type *ValueType, bool IsRead) {
3458   // write and register intrinsics only support 32 and 64 bit operations.
3459   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
3460           && "Unsupported size for register.");
3461 
3462   CodeGen::CGBuilderTy &Builder = CGF.Builder;
3463   CodeGen::CodeGenModule &CGM = CGF.CGM;
3464   LLVMContext &Context = CGM.getLLVMContext();
3465 
3466   const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
3467   StringRef SysReg = cast<StringLiteral>(SysRegStrExpr)->getString();
3468 
3469   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
3470   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
3471   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
3472 
3473   llvm::Type *Types[] = { RegisterType };
3474 
3475   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
3476   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
3477             && "Can't fit 64-bit value in 32-bit register");
3478 
3479   if (IsRead) {
3480     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
3481     llvm::Value *Call = Builder.CreateCall(F, Metadata);
3482 
3483     if (MixedTypes)
3484       // Read into 64 bit register and then truncate result to 32 bit.
3485       return Builder.CreateTrunc(Call, ValueType);
3486 
3487     if (ValueType->isPointerTy())
3488       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
3489       return Builder.CreateIntToPtr(Call, ValueType);
3490 
3491     return Call;
3492   }
3493 
3494   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
3495   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
3496   if (MixedTypes) {
3497     // Extend 32 bit write value to 64 bit to pass to write.
3498     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
3499     return Builder.CreateCall(F, { Metadata, ArgValue });
3500   }
3501 
3502   if (ValueType->isPointerTy()) {
3503     // Have VoidPtrTy ArgValue but want to return an i32/i64.
3504     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
3505     return Builder.CreateCall(F, { Metadata, ArgValue });
3506   }
3507 
3508   return Builder.CreateCall(F, { Metadata, ArgValue });
3509 }
3510 
3511 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
3512 /// argument that specifies the vector type.
3513 static bool HasExtraNeonArgument(unsigned BuiltinID) {
3514   switch (BuiltinID) {
3515   default: break;
3516   case NEON::BI__builtin_neon_vget_lane_i8:
3517   case NEON::BI__builtin_neon_vget_lane_i16:
3518   case NEON::BI__builtin_neon_vget_lane_i32:
3519   case NEON::BI__builtin_neon_vget_lane_i64:
3520   case NEON::BI__builtin_neon_vget_lane_f32:
3521   case NEON::BI__builtin_neon_vgetq_lane_i8:
3522   case NEON::BI__builtin_neon_vgetq_lane_i16:
3523   case NEON::BI__builtin_neon_vgetq_lane_i32:
3524   case NEON::BI__builtin_neon_vgetq_lane_i64:
3525   case NEON::BI__builtin_neon_vgetq_lane_f32:
3526   case NEON::BI__builtin_neon_vset_lane_i8:
3527   case NEON::BI__builtin_neon_vset_lane_i16:
3528   case NEON::BI__builtin_neon_vset_lane_i32:
3529   case NEON::BI__builtin_neon_vset_lane_i64:
3530   case NEON::BI__builtin_neon_vset_lane_f32:
3531   case NEON::BI__builtin_neon_vsetq_lane_i8:
3532   case NEON::BI__builtin_neon_vsetq_lane_i16:
3533   case NEON::BI__builtin_neon_vsetq_lane_i32:
3534   case NEON::BI__builtin_neon_vsetq_lane_i64:
3535   case NEON::BI__builtin_neon_vsetq_lane_f32:
3536   case NEON::BI__builtin_neon_vsha1h_u32:
3537   case NEON::BI__builtin_neon_vsha1cq_u32:
3538   case NEON::BI__builtin_neon_vsha1pq_u32:
3539   case NEON::BI__builtin_neon_vsha1mq_u32:
3540   case ARM::BI_MoveToCoprocessor:
3541   case ARM::BI_MoveToCoprocessor2:
3542     return false;
3543   }
3544   return true;
3545 }
3546 
3547 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
3548                                            const CallExpr *E) {
3549   if (auto Hint = GetValueForARMHint(BuiltinID))
3550     return Hint;
3551 
3552   if (BuiltinID == ARM::BI__emit) {
3553     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
3554     llvm::FunctionType *FTy =
3555         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
3556 
3557     APSInt Value;
3558     if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
3559       llvm_unreachable("Sema will ensure that the parameter is constant");
3560 
3561     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
3562 
3563     llvm::InlineAsm *Emit =
3564         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
3565                                  /*SideEffects=*/true)
3566                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
3567                                  /*SideEffects=*/true);
3568 
3569     return Builder.CreateCall(Emit);
3570   }
3571 
3572   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
3573     Value *Option = EmitScalarExpr(E->getArg(0));
3574     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
3575   }
3576 
3577   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
3578     Value *Address = EmitScalarExpr(E->getArg(0));
3579     Value *RW      = EmitScalarExpr(E->getArg(1));
3580     Value *IsData  = EmitScalarExpr(E->getArg(2));
3581 
3582     // Locality is not supported on ARM target
3583     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
3584 
3585     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
3586     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
3587   }
3588 
3589   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
3590     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit),
3591                                                EmitScalarExpr(E->getArg(0)),
3592                               "rbit");
3593   }
3594 
3595   if (BuiltinID == ARM::BI__clear_cache) {
3596     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
3597     const FunctionDecl *FD = E->getDirectCallee();
3598     Value *Ops[2];
3599     for (unsigned i = 0; i < 2; i++)
3600       Ops[i] = EmitScalarExpr(E->getArg(i));
3601     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
3602     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
3603     StringRef Name = FD->getName();
3604     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
3605   }
3606 
3607   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
3608       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
3609         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
3610        getContext().getTypeSize(E->getType()) == 64) ||
3611       BuiltinID == ARM::BI__ldrexd) {
3612     Function *F;
3613 
3614     switch (BuiltinID) {
3615     default: llvm_unreachable("unexpected builtin");
3616     case ARM::BI__builtin_arm_ldaex:
3617       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
3618       break;
3619     case ARM::BI__builtin_arm_ldrexd:
3620     case ARM::BI__builtin_arm_ldrex:
3621     case ARM::BI__ldrexd:
3622       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
3623       break;
3624     }
3625 
3626     Value *LdPtr = EmitScalarExpr(E->getArg(0));
3627     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
3628                                     "ldrexd");
3629 
3630     Value *Val0 = Builder.CreateExtractValue(Val, 1);
3631     Value *Val1 = Builder.CreateExtractValue(Val, 0);
3632     Val0 = Builder.CreateZExt(Val0, Int64Ty);
3633     Val1 = Builder.CreateZExt(Val1, Int64Ty);
3634 
3635     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
3636     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
3637     Val = Builder.CreateOr(Val, Val1);
3638     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
3639   }
3640 
3641   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
3642       BuiltinID == ARM::BI__builtin_arm_ldaex) {
3643     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
3644 
3645     QualType Ty = E->getType();
3646     llvm::Type *RealResTy = ConvertType(Ty);
3647     llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
3648                                                   getContext().getTypeSize(Ty));
3649     LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
3650 
3651     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
3652                                        ? Intrinsic::arm_ldaex
3653                                        : Intrinsic::arm_ldrex,
3654                                    LoadAddr->getType());
3655     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
3656 
3657     if (RealResTy->isPointerTy())
3658       return Builder.CreateIntToPtr(Val, RealResTy);
3659     else {
3660       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
3661       return Builder.CreateBitCast(Val, RealResTy);
3662     }
3663   }
3664 
3665   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
3666       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
3667         BuiltinID == ARM::BI__builtin_arm_strex) &&
3668        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
3669     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
3670                                        ? Intrinsic::arm_stlexd
3671                                        : Intrinsic::arm_strexd);
3672     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr);
3673 
3674     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
3675     Value *Val = EmitScalarExpr(E->getArg(0));
3676     Builder.CreateStore(Val, Tmp);
3677 
3678     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
3679     Val = Builder.CreateLoad(LdPtr);
3680 
3681     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
3682     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
3683     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
3684     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
3685   }
3686 
3687   if (BuiltinID == ARM::BI__builtin_arm_strex ||
3688       BuiltinID == ARM::BI__builtin_arm_stlex) {
3689     Value *StoreVal = EmitScalarExpr(E->getArg(0));
3690     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
3691 
3692     QualType Ty = E->getArg(0)->getType();
3693     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
3694                                                  getContext().getTypeSize(Ty));
3695     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
3696 
3697     if (StoreVal->getType()->isPointerTy())
3698       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
3699     else {
3700       StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
3701       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
3702     }
3703 
3704     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
3705                                        ? Intrinsic::arm_stlex
3706                                        : Intrinsic::arm_strex,
3707                                    StoreAddr->getType());
3708     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
3709   }
3710 
3711   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
3712     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
3713     return Builder.CreateCall(F);
3714   }
3715 
3716   // CRC32
3717   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
3718   switch (BuiltinID) {
3719   case ARM::BI__builtin_arm_crc32b:
3720     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
3721   case ARM::BI__builtin_arm_crc32cb:
3722     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
3723   case ARM::BI__builtin_arm_crc32h:
3724     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
3725   case ARM::BI__builtin_arm_crc32ch:
3726     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
3727   case ARM::BI__builtin_arm_crc32w:
3728   case ARM::BI__builtin_arm_crc32d:
3729     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
3730   case ARM::BI__builtin_arm_crc32cw:
3731   case ARM::BI__builtin_arm_crc32cd:
3732     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
3733   }
3734 
3735   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
3736     Value *Arg0 = EmitScalarExpr(E->getArg(0));
3737     Value *Arg1 = EmitScalarExpr(E->getArg(1));
3738 
3739     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
3740     // intrinsics, hence we need different codegen for these cases.
3741     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
3742         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
3743       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
3744       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
3745       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
3746       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
3747 
3748       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3749       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
3750       return Builder.CreateCall(F, {Res, Arg1b});
3751     } else {
3752       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
3753 
3754       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3755       return Builder.CreateCall(F, {Arg0, Arg1});
3756     }
3757   }
3758 
3759   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
3760       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
3761       BuiltinID == ARM::BI__builtin_arm_rsrp ||
3762       BuiltinID == ARM::BI__builtin_arm_wsr ||
3763       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
3764       BuiltinID == ARM::BI__builtin_arm_wsrp) {
3765 
3766     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
3767                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
3768                   BuiltinID == ARM::BI__builtin_arm_rsrp;
3769 
3770     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
3771                             BuiltinID == ARM::BI__builtin_arm_wsrp;
3772 
3773     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
3774                    BuiltinID == ARM::BI__builtin_arm_wsr64;
3775 
3776     llvm::Type *ValueType;
3777     llvm::Type *RegisterType;
3778     if (IsPointerBuiltin) {
3779       ValueType = VoidPtrTy;
3780       RegisterType = Int32Ty;
3781     } else if (Is64Bit) {
3782       ValueType = RegisterType = Int64Ty;
3783     } else {
3784       ValueType = RegisterType = Int32Ty;
3785     }
3786 
3787     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
3788   }
3789 
3790   // Find out if any arguments are required to be integer constant
3791   // expressions.
3792   unsigned ICEArguments = 0;
3793   ASTContext::GetBuiltinTypeError Error;
3794   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3795   assert(Error == ASTContext::GE_None && "Should not codegen an error");
3796 
3797   auto getAlignmentValue32 = [&](Address addr) -> Value* {
3798     return Builder.getInt32(addr.getAlignment().getQuantity());
3799   };
3800 
3801   Address PtrOp0 = Address::invalid();
3802   Address PtrOp1 = Address::invalid();
3803   SmallVector<Value*, 4> Ops;
3804   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
3805   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
3806   for (unsigned i = 0, e = NumArgs; i != e; i++) {
3807     if (i == 0) {
3808       switch (BuiltinID) {
3809       case NEON::BI__builtin_neon_vld1_v:
3810       case NEON::BI__builtin_neon_vld1q_v:
3811       case NEON::BI__builtin_neon_vld1q_lane_v:
3812       case NEON::BI__builtin_neon_vld1_lane_v:
3813       case NEON::BI__builtin_neon_vld1_dup_v:
3814       case NEON::BI__builtin_neon_vld1q_dup_v:
3815       case NEON::BI__builtin_neon_vst1_v:
3816       case NEON::BI__builtin_neon_vst1q_v:
3817       case NEON::BI__builtin_neon_vst1q_lane_v:
3818       case NEON::BI__builtin_neon_vst1_lane_v:
3819       case NEON::BI__builtin_neon_vst2_v:
3820       case NEON::BI__builtin_neon_vst2q_v:
3821       case NEON::BI__builtin_neon_vst2_lane_v:
3822       case NEON::BI__builtin_neon_vst2q_lane_v:
3823       case NEON::BI__builtin_neon_vst3_v:
3824       case NEON::BI__builtin_neon_vst3q_v:
3825       case NEON::BI__builtin_neon_vst3_lane_v:
3826       case NEON::BI__builtin_neon_vst3q_lane_v:
3827       case NEON::BI__builtin_neon_vst4_v:
3828       case NEON::BI__builtin_neon_vst4q_v:
3829       case NEON::BI__builtin_neon_vst4_lane_v:
3830       case NEON::BI__builtin_neon_vst4q_lane_v:
3831         // Get the alignment for the argument in addition to the value;
3832         // we'll use it later.
3833         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
3834         Ops.push_back(PtrOp0.getPointer());
3835         continue;
3836       }
3837     }
3838     if (i == 1) {
3839       switch (BuiltinID) {
3840       case NEON::BI__builtin_neon_vld2_v:
3841       case NEON::BI__builtin_neon_vld2q_v:
3842       case NEON::BI__builtin_neon_vld3_v:
3843       case NEON::BI__builtin_neon_vld3q_v:
3844       case NEON::BI__builtin_neon_vld4_v:
3845       case NEON::BI__builtin_neon_vld4q_v:
3846       case NEON::BI__builtin_neon_vld2_lane_v:
3847       case NEON::BI__builtin_neon_vld2q_lane_v:
3848       case NEON::BI__builtin_neon_vld3_lane_v:
3849       case NEON::BI__builtin_neon_vld3q_lane_v:
3850       case NEON::BI__builtin_neon_vld4_lane_v:
3851       case NEON::BI__builtin_neon_vld4q_lane_v:
3852       case NEON::BI__builtin_neon_vld2_dup_v:
3853       case NEON::BI__builtin_neon_vld3_dup_v:
3854       case NEON::BI__builtin_neon_vld4_dup_v:
3855         // Get the alignment for the argument in addition to the value;
3856         // we'll use it later.
3857         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
3858         Ops.push_back(PtrOp1.getPointer());
3859         continue;
3860       }
3861     }
3862 
3863     if ((ICEArguments & (1 << i)) == 0) {
3864       Ops.push_back(EmitScalarExpr(E->getArg(i)));
3865     } else {
3866       // If this is required to be a constant, constant fold it so that we know
3867       // that the generated intrinsic gets a ConstantInt.
3868       llvm::APSInt Result;
3869       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
3870       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
3871       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
3872     }
3873   }
3874 
3875   switch (BuiltinID) {
3876   default: break;
3877 
3878   case NEON::BI__builtin_neon_vget_lane_i8:
3879   case NEON::BI__builtin_neon_vget_lane_i16:
3880   case NEON::BI__builtin_neon_vget_lane_i32:
3881   case NEON::BI__builtin_neon_vget_lane_i64:
3882   case NEON::BI__builtin_neon_vget_lane_f32:
3883   case NEON::BI__builtin_neon_vgetq_lane_i8:
3884   case NEON::BI__builtin_neon_vgetq_lane_i16:
3885   case NEON::BI__builtin_neon_vgetq_lane_i32:
3886   case NEON::BI__builtin_neon_vgetq_lane_i64:
3887   case NEON::BI__builtin_neon_vgetq_lane_f32:
3888     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
3889 
3890   case NEON::BI__builtin_neon_vset_lane_i8:
3891   case NEON::BI__builtin_neon_vset_lane_i16:
3892   case NEON::BI__builtin_neon_vset_lane_i32:
3893   case NEON::BI__builtin_neon_vset_lane_i64:
3894   case NEON::BI__builtin_neon_vset_lane_f32:
3895   case NEON::BI__builtin_neon_vsetq_lane_i8:
3896   case NEON::BI__builtin_neon_vsetq_lane_i16:
3897   case NEON::BI__builtin_neon_vsetq_lane_i32:
3898   case NEON::BI__builtin_neon_vsetq_lane_i64:
3899   case NEON::BI__builtin_neon_vsetq_lane_f32:
3900     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
3901 
3902   case NEON::BI__builtin_neon_vsha1h_u32:
3903     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
3904                         "vsha1h");
3905   case NEON::BI__builtin_neon_vsha1cq_u32:
3906     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
3907                         "vsha1h");
3908   case NEON::BI__builtin_neon_vsha1pq_u32:
3909     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
3910                         "vsha1h");
3911   case NEON::BI__builtin_neon_vsha1mq_u32:
3912     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
3913                         "vsha1h");
3914 
3915   // The ARM _MoveToCoprocessor builtins put the input register value as
3916   // the first argument, but the LLVM intrinsic expects it as the third one.
3917   case ARM::BI_MoveToCoprocessor:
3918   case ARM::BI_MoveToCoprocessor2: {
3919     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
3920                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
3921     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
3922                                   Ops[3], Ops[4], Ops[5]});
3923   }
3924   }
3925 
3926   // Get the last argument, which specifies the vector type.
3927   assert(HasExtraArg);
3928   llvm::APSInt Result;
3929   const Expr *Arg = E->getArg(E->getNumArgs()-1);
3930   if (!Arg->isIntegerConstantExpr(Result, getContext()))
3931     return nullptr;
3932 
3933   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
3934       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
3935     // Determine the overloaded type of this builtin.
3936     llvm::Type *Ty;
3937     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
3938       Ty = FloatTy;
3939     else
3940       Ty = DoubleTy;
3941 
3942     // Determine whether this is an unsigned conversion or not.
3943     bool usgn = Result.getZExtValue() == 1;
3944     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
3945 
3946     // Call the appropriate intrinsic.
3947     Function *F = CGM.getIntrinsic(Int, Ty);
3948     return Builder.CreateCall(F, Ops, "vcvtr");
3949   }
3950 
3951   // Determine the type of this overloaded NEON intrinsic.
3952   NeonTypeFlags Type(Result.getZExtValue());
3953   bool usgn = Type.isUnsigned();
3954   bool rightShift = false;
3955 
3956   llvm::VectorType *VTy = GetNeonType(this, Type);
3957   llvm::Type *Ty = VTy;
3958   if (!Ty)
3959     return nullptr;
3960 
3961   // Many NEON builtins have identical semantics and uses in ARM and
3962   // AArch64. Emit these in a single function.
3963   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
3964   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
3965       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
3966   if (Builtin)
3967     return EmitCommonNeonBuiltinExpr(
3968         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
3969         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
3970 
3971   unsigned Int;
3972   switch (BuiltinID) {
3973   default: return nullptr;
3974   case NEON::BI__builtin_neon_vld1q_lane_v:
3975     // Handle 64-bit integer elements as a special case.  Use shuffles of
3976     // one-element vectors to avoid poor code for i64 in the backend.
3977     if (VTy->getElementType()->isIntegerTy(64)) {
3978       // Extract the other lane.
3979       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3980       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
3981       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
3982       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3983       // Load the value as a one-element vector.
3984       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
3985       llvm::Type *Tys[] = {Ty, Int8PtrTy};
3986       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
3987       Value *Align = getAlignmentValue32(PtrOp0);
3988       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
3989       // Combine them.
3990       uint32_t Indices[] = {1 - Lane, Lane};
3991       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
3992       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
3993     }
3994     // fall through
3995   case NEON::BI__builtin_neon_vld1_lane_v: {
3996     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3997     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
3998     Value *Ld = Builder.CreateLoad(PtrOp0);
3999     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
4000   }
4001   case NEON::BI__builtin_neon_vld2_dup_v:
4002   case NEON::BI__builtin_neon_vld3_dup_v:
4003   case NEON::BI__builtin_neon_vld4_dup_v: {
4004     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
4005     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
4006       switch (BuiltinID) {
4007       case NEON::BI__builtin_neon_vld2_dup_v:
4008         Int = Intrinsic::arm_neon_vld2;
4009         break;
4010       case NEON::BI__builtin_neon_vld3_dup_v:
4011         Int = Intrinsic::arm_neon_vld3;
4012         break;
4013       case NEON::BI__builtin_neon_vld4_dup_v:
4014         Int = Intrinsic::arm_neon_vld4;
4015         break;
4016       default: llvm_unreachable("unknown vld_dup intrinsic?");
4017       }
4018       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4019       Function *F = CGM.getIntrinsic(Int, Tys);
4020       llvm::Value *Align = getAlignmentValue32(PtrOp1);
4021       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
4022       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4023       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4024       return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4025     }
4026     switch (BuiltinID) {
4027     case NEON::BI__builtin_neon_vld2_dup_v:
4028       Int = Intrinsic::arm_neon_vld2lane;
4029       break;
4030     case NEON::BI__builtin_neon_vld3_dup_v:
4031       Int = Intrinsic::arm_neon_vld3lane;
4032       break;
4033     case NEON::BI__builtin_neon_vld4_dup_v:
4034       Int = Intrinsic::arm_neon_vld4lane;
4035       break;
4036     default: llvm_unreachable("unknown vld_dup intrinsic?");
4037     }
4038     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4039     Function *F = CGM.getIntrinsic(Int, Tys);
4040     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
4041 
4042     SmallVector<Value*, 6> Args;
4043     Args.push_back(Ops[1]);
4044     Args.append(STy->getNumElements(), UndefValue::get(Ty));
4045 
4046     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
4047     Args.push_back(CI);
4048     Args.push_back(getAlignmentValue32(PtrOp1));
4049 
4050     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
4051     // splat lane 0 to all elts in each vector of the result.
4052     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
4053       Value *Val = Builder.CreateExtractValue(Ops[1], i);
4054       Value *Elt = Builder.CreateBitCast(Val, Ty);
4055       Elt = EmitNeonSplat(Elt, CI);
4056       Elt = Builder.CreateBitCast(Elt, Val->getType());
4057       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
4058     }
4059     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4060     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4061     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4062   }
4063   case NEON::BI__builtin_neon_vqrshrn_n_v:
4064     Int =
4065       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
4066     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
4067                         1, true);
4068   case NEON::BI__builtin_neon_vqrshrun_n_v:
4069     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
4070                         Ops, "vqrshrun_n", 1, true);
4071   case NEON::BI__builtin_neon_vqshrn_n_v:
4072     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
4073     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
4074                         1, true);
4075   case NEON::BI__builtin_neon_vqshrun_n_v:
4076     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
4077                         Ops, "vqshrun_n", 1, true);
4078   case NEON::BI__builtin_neon_vrecpe_v:
4079   case NEON::BI__builtin_neon_vrecpeq_v:
4080     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
4081                         Ops, "vrecpe");
4082   case NEON::BI__builtin_neon_vrshrn_n_v:
4083     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
4084                         Ops, "vrshrn_n", 1, true);
4085   case NEON::BI__builtin_neon_vrsra_n_v:
4086   case NEON::BI__builtin_neon_vrsraq_n_v:
4087     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4088     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4089     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
4090     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
4091     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
4092     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
4093   case NEON::BI__builtin_neon_vsri_n_v:
4094   case NEON::BI__builtin_neon_vsriq_n_v:
4095     rightShift = true;
4096   case NEON::BI__builtin_neon_vsli_n_v:
4097   case NEON::BI__builtin_neon_vsliq_n_v:
4098     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
4099     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
4100                         Ops, "vsli_n");
4101   case NEON::BI__builtin_neon_vsra_n_v:
4102   case NEON::BI__builtin_neon_vsraq_n_v:
4103     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4104     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
4105     return Builder.CreateAdd(Ops[0], Ops[1]);
4106   case NEON::BI__builtin_neon_vst1q_lane_v:
4107     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
4108     // a one-element vector and avoid poor code for i64 in the backend.
4109     if (VTy->getElementType()->isIntegerTy(64)) {
4110       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4111       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
4112       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4113       Ops[2] = getAlignmentValue32(PtrOp0);
4114       llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
4115       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
4116                                                  Tys), Ops);
4117     }
4118     // fall through
4119   case NEON::BI__builtin_neon_vst1_lane_v: {
4120     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4121     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
4122     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4123     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
4124     return St;
4125   }
4126   case NEON::BI__builtin_neon_vtbl1_v:
4127     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
4128                         Ops, "vtbl1");
4129   case NEON::BI__builtin_neon_vtbl2_v:
4130     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
4131                         Ops, "vtbl2");
4132   case NEON::BI__builtin_neon_vtbl3_v:
4133     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
4134                         Ops, "vtbl3");
4135   case NEON::BI__builtin_neon_vtbl4_v:
4136     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
4137                         Ops, "vtbl4");
4138   case NEON::BI__builtin_neon_vtbx1_v:
4139     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
4140                         Ops, "vtbx1");
4141   case NEON::BI__builtin_neon_vtbx2_v:
4142     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
4143                         Ops, "vtbx2");
4144   case NEON::BI__builtin_neon_vtbx3_v:
4145     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
4146                         Ops, "vtbx3");
4147   case NEON::BI__builtin_neon_vtbx4_v:
4148     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
4149                         Ops, "vtbx4");
4150   }
4151 }
4152 
4153 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
4154                                       const CallExpr *E,
4155                                       SmallVectorImpl<Value *> &Ops) {
4156   unsigned int Int = 0;
4157   const char *s = nullptr;
4158 
4159   switch (BuiltinID) {
4160   default:
4161     return nullptr;
4162   case NEON::BI__builtin_neon_vtbl1_v:
4163   case NEON::BI__builtin_neon_vqtbl1_v:
4164   case NEON::BI__builtin_neon_vqtbl1q_v:
4165   case NEON::BI__builtin_neon_vtbl2_v:
4166   case NEON::BI__builtin_neon_vqtbl2_v:
4167   case NEON::BI__builtin_neon_vqtbl2q_v:
4168   case NEON::BI__builtin_neon_vtbl3_v:
4169   case NEON::BI__builtin_neon_vqtbl3_v:
4170   case NEON::BI__builtin_neon_vqtbl3q_v:
4171   case NEON::BI__builtin_neon_vtbl4_v:
4172   case NEON::BI__builtin_neon_vqtbl4_v:
4173   case NEON::BI__builtin_neon_vqtbl4q_v:
4174     break;
4175   case NEON::BI__builtin_neon_vtbx1_v:
4176   case NEON::BI__builtin_neon_vqtbx1_v:
4177   case NEON::BI__builtin_neon_vqtbx1q_v:
4178   case NEON::BI__builtin_neon_vtbx2_v:
4179   case NEON::BI__builtin_neon_vqtbx2_v:
4180   case NEON::BI__builtin_neon_vqtbx2q_v:
4181   case NEON::BI__builtin_neon_vtbx3_v:
4182   case NEON::BI__builtin_neon_vqtbx3_v:
4183   case NEON::BI__builtin_neon_vqtbx3q_v:
4184   case NEON::BI__builtin_neon_vtbx4_v:
4185   case NEON::BI__builtin_neon_vqtbx4_v:
4186   case NEON::BI__builtin_neon_vqtbx4q_v:
4187     break;
4188   }
4189 
4190   assert(E->getNumArgs() >= 3);
4191 
4192   // Get the last argument, which specifies the vector type.
4193   llvm::APSInt Result;
4194   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
4195   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
4196     return nullptr;
4197 
4198   // Determine the type of this overloaded NEON intrinsic.
4199   NeonTypeFlags Type(Result.getZExtValue());
4200   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
4201   if (!Ty)
4202     return nullptr;
4203 
4204   CodeGen::CGBuilderTy &Builder = CGF.Builder;
4205 
4206   // AArch64 scalar builtins are not overloaded, they do not have an extra
4207   // argument that specifies the vector type, need to handle each case.
4208   switch (BuiltinID) {
4209   case NEON::BI__builtin_neon_vtbl1_v: {
4210     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
4211                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
4212                               "vtbl1");
4213   }
4214   case NEON::BI__builtin_neon_vtbl2_v: {
4215     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
4216                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
4217                               "vtbl1");
4218   }
4219   case NEON::BI__builtin_neon_vtbl3_v: {
4220     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
4221                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
4222                               "vtbl2");
4223   }
4224   case NEON::BI__builtin_neon_vtbl4_v: {
4225     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
4226                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
4227                               "vtbl2");
4228   }
4229   case NEON::BI__builtin_neon_vtbx1_v: {
4230     Value *TblRes =
4231         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
4232                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
4233 
4234     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
4235     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
4236     CmpRes = Builder.CreateSExt(CmpRes, Ty);
4237 
4238     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4239     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4240     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
4241   }
4242   case NEON::BI__builtin_neon_vtbx2_v: {
4243     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
4244                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
4245                               "vtbx1");
4246   }
4247   case NEON::BI__builtin_neon_vtbx3_v: {
4248     Value *TblRes =
4249         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
4250                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
4251 
4252     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
4253     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
4254                                            TwentyFourV);
4255     CmpRes = Builder.CreateSExt(CmpRes, Ty);
4256 
4257     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4258     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4259     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
4260   }
4261   case NEON::BI__builtin_neon_vtbx4_v: {
4262     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
4263                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
4264                               "vtbx2");
4265   }
4266   case NEON::BI__builtin_neon_vqtbl1_v:
4267   case NEON::BI__builtin_neon_vqtbl1q_v:
4268     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
4269   case NEON::BI__builtin_neon_vqtbl2_v:
4270   case NEON::BI__builtin_neon_vqtbl2q_v: {
4271     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
4272   case NEON::BI__builtin_neon_vqtbl3_v:
4273   case NEON::BI__builtin_neon_vqtbl3q_v:
4274     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
4275   case NEON::BI__builtin_neon_vqtbl4_v:
4276   case NEON::BI__builtin_neon_vqtbl4q_v:
4277     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
4278   case NEON::BI__builtin_neon_vqtbx1_v:
4279   case NEON::BI__builtin_neon_vqtbx1q_v:
4280     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
4281   case NEON::BI__builtin_neon_vqtbx2_v:
4282   case NEON::BI__builtin_neon_vqtbx2q_v:
4283     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
4284   case NEON::BI__builtin_neon_vqtbx3_v:
4285   case NEON::BI__builtin_neon_vqtbx3q_v:
4286     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
4287   case NEON::BI__builtin_neon_vqtbx4_v:
4288   case NEON::BI__builtin_neon_vqtbx4q_v:
4289     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
4290   }
4291   }
4292 
4293   if (!Int)
4294     return nullptr;
4295 
4296   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
4297   return CGF.EmitNeonCall(F, Ops, s);
4298 }
4299 
4300 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
4301   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
4302   Op = Builder.CreateBitCast(Op, Int16Ty);
4303   Value *V = UndefValue::get(VTy);
4304   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4305   Op = Builder.CreateInsertElement(V, Op, CI);
4306   return Op;
4307 }
4308 
4309 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
4310                                                const CallExpr *E) {
4311   unsigned HintID = static_cast<unsigned>(-1);
4312   switch (BuiltinID) {
4313   default: break;
4314   case AArch64::BI__builtin_arm_nop:
4315     HintID = 0;
4316     break;
4317   case AArch64::BI__builtin_arm_yield:
4318     HintID = 1;
4319     break;
4320   case AArch64::BI__builtin_arm_wfe:
4321     HintID = 2;
4322     break;
4323   case AArch64::BI__builtin_arm_wfi:
4324     HintID = 3;
4325     break;
4326   case AArch64::BI__builtin_arm_sev:
4327     HintID = 4;
4328     break;
4329   case AArch64::BI__builtin_arm_sevl:
4330     HintID = 5;
4331     break;
4332   }
4333 
4334   if (HintID != static_cast<unsigned>(-1)) {
4335     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
4336     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
4337   }
4338 
4339   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
4340     Value *Address         = EmitScalarExpr(E->getArg(0));
4341     Value *RW              = EmitScalarExpr(E->getArg(1));
4342     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
4343     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
4344     Value *IsData          = EmitScalarExpr(E->getArg(4));
4345 
4346     Value *Locality = nullptr;
4347     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
4348       // Temporal fetch, needs to convert cache level to locality.
4349       Locality = llvm::ConstantInt::get(Int32Ty,
4350         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
4351     } else {
4352       // Streaming fetch.
4353       Locality = llvm::ConstantInt::get(Int32Ty, 0);
4354     }
4355 
4356     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
4357     // PLDL3STRM or PLDL2STRM.
4358     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4359     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4360   }
4361 
4362   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
4363     assert((getContext().getTypeSize(E->getType()) == 32) &&
4364            "rbit of unusual size!");
4365     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4366     return Builder.CreateCall(
4367         CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
4368   }
4369   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
4370     assert((getContext().getTypeSize(E->getType()) == 64) &&
4371            "rbit of unusual size!");
4372     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4373     return Builder.CreateCall(
4374         CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
4375   }
4376 
4377   if (BuiltinID == AArch64::BI__clear_cache) {
4378     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4379     const FunctionDecl *FD = E->getDirectCallee();
4380     Value *Ops[2];
4381     for (unsigned i = 0; i < 2; i++)
4382       Ops[i] = EmitScalarExpr(E->getArg(i));
4383     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4384     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4385     StringRef Name = FD->getName();
4386     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4387   }
4388 
4389   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
4390       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
4391       getContext().getTypeSize(E->getType()) == 128) {
4392     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
4393                                        ? Intrinsic::aarch64_ldaxp
4394                                        : Intrinsic::aarch64_ldxp);
4395 
4396     Value *LdPtr = EmitScalarExpr(E->getArg(0));
4397     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4398                                     "ldxp");
4399 
4400     Value *Val0 = Builder.CreateExtractValue(Val, 1);
4401     Value *Val1 = Builder.CreateExtractValue(Val, 0);
4402     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
4403     Val0 = Builder.CreateZExt(Val0, Int128Ty);
4404     Val1 = Builder.CreateZExt(Val1, Int128Ty);
4405 
4406     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
4407     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4408     Val = Builder.CreateOr(Val, Val1);
4409     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4410   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
4411              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
4412     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4413 
4414     QualType Ty = E->getType();
4415     llvm::Type *RealResTy = ConvertType(Ty);
4416     llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
4417                                                   getContext().getTypeSize(Ty));
4418     LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
4419 
4420     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
4421                                        ? Intrinsic::aarch64_ldaxr
4422                                        : Intrinsic::aarch64_ldxr,
4423                                    LoadAddr->getType());
4424     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
4425 
4426     if (RealResTy->isPointerTy())
4427       return Builder.CreateIntToPtr(Val, RealResTy);
4428 
4429     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4430     return Builder.CreateBitCast(Val, RealResTy);
4431   }
4432 
4433   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
4434        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
4435       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
4436     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
4437                                        ? Intrinsic::aarch64_stlxp
4438                                        : Intrinsic::aarch64_stxp);
4439     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr);
4440 
4441     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4442     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
4443 
4444     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
4445     llvm::Value *Val = Builder.CreateLoad(Tmp);
4446 
4447     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4448     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4449     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
4450                                          Int8PtrTy);
4451     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
4452   }
4453 
4454   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
4455       BuiltinID == AArch64::BI__builtin_arm_stlex) {
4456     Value *StoreVal = EmitScalarExpr(E->getArg(0));
4457     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4458 
4459     QualType Ty = E->getArg(0)->getType();
4460     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4461                                                  getContext().getTypeSize(Ty));
4462     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4463 
4464     if (StoreVal->getType()->isPointerTy())
4465       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
4466     else {
4467       StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
4468       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
4469     }
4470 
4471     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
4472                                        ? Intrinsic::aarch64_stlxr
4473                                        : Intrinsic::aarch64_stxr,
4474                                    StoreAddr->getType());
4475     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
4476   }
4477 
4478   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
4479     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
4480     return Builder.CreateCall(F);
4481   }
4482 
4483   if (BuiltinID == AArch64::BI__builtin_thread_pointer) {
4484     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_thread_pointer);
4485     return Builder.CreateCall(F);
4486   }
4487 
4488   // CRC32
4489   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4490   switch (BuiltinID) {
4491   case AArch64::BI__builtin_arm_crc32b:
4492     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
4493   case AArch64::BI__builtin_arm_crc32cb:
4494     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
4495   case AArch64::BI__builtin_arm_crc32h:
4496     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
4497   case AArch64::BI__builtin_arm_crc32ch:
4498     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
4499   case AArch64::BI__builtin_arm_crc32w:
4500     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
4501   case AArch64::BI__builtin_arm_crc32cw:
4502     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
4503   case AArch64::BI__builtin_arm_crc32d:
4504     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
4505   case AArch64::BI__builtin_arm_crc32cd:
4506     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
4507   }
4508 
4509   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4510     Value *Arg0 = EmitScalarExpr(E->getArg(0));
4511     Value *Arg1 = EmitScalarExpr(E->getArg(1));
4512     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4513 
4514     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
4515     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
4516 
4517     return Builder.CreateCall(F, {Arg0, Arg1});
4518   }
4519 
4520   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
4521       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
4522       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
4523       BuiltinID == AArch64::BI__builtin_arm_wsr ||
4524       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
4525       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
4526 
4527     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
4528                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
4529                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
4530 
4531     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
4532                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
4533 
4534     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
4535                    BuiltinID != AArch64::BI__builtin_arm_wsr;
4536 
4537     llvm::Type *ValueType;
4538     llvm::Type *RegisterType = Int64Ty;
4539     if (IsPointerBuiltin) {
4540       ValueType = VoidPtrTy;
4541     } else if (Is64Bit) {
4542       ValueType = Int64Ty;
4543     } else {
4544       ValueType = Int32Ty;
4545     }
4546 
4547     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4548   }
4549 
4550   // Find out if any arguments are required to be integer constant
4551   // expressions.
4552   unsigned ICEArguments = 0;
4553   ASTContext::GetBuiltinTypeError Error;
4554   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4555   assert(Error == ASTContext::GE_None && "Should not codegen an error");
4556 
4557   llvm::SmallVector<Value*, 4> Ops;
4558   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
4559     if ((ICEArguments & (1 << i)) == 0) {
4560       Ops.push_back(EmitScalarExpr(E->getArg(i)));
4561     } else {
4562       // If this is required to be a constant, constant fold it so that we know
4563       // that the generated intrinsic gets a ConstantInt.
4564       llvm::APSInt Result;
4565       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4566       assert(IsConst && "Constant arg isn't actually constant?");
4567       (void)IsConst;
4568       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4569     }
4570   }
4571 
4572   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
4573   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4574       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
4575 
4576   if (Builtin) {
4577     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
4578     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
4579     assert(Result && "SISD intrinsic should have been handled");
4580     return Result;
4581   }
4582 
4583   llvm::APSInt Result;
4584   const Expr *Arg = E->getArg(E->getNumArgs()-1);
4585   NeonTypeFlags Type(0);
4586   if (Arg->isIntegerConstantExpr(Result, getContext()))
4587     // Determine the type of this overloaded NEON intrinsic.
4588     Type = NeonTypeFlags(Result.getZExtValue());
4589 
4590   bool usgn = Type.isUnsigned();
4591   bool quad = Type.isQuad();
4592 
4593   // Handle non-overloaded intrinsics first.
4594   switch (BuiltinID) {
4595   default: break;
4596   case NEON::BI__builtin_neon_vldrq_p128: {
4597     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
4598     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
4599     return Builder.CreateDefaultAlignedLoad(Ptr);
4600   }
4601   case NEON::BI__builtin_neon_vstrq_p128: {
4602     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
4603     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
4604     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
4605   }
4606   case NEON::BI__builtin_neon_vcvts_u32_f32:
4607   case NEON::BI__builtin_neon_vcvtd_u64_f64:
4608     usgn = true;
4609     // FALL THROUGH
4610   case NEON::BI__builtin_neon_vcvts_s32_f32:
4611   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
4612     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4613     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
4614     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
4615     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
4616     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
4617     if (usgn)
4618       return Builder.CreateFPToUI(Ops[0], InTy);
4619     return Builder.CreateFPToSI(Ops[0], InTy);
4620   }
4621   case NEON::BI__builtin_neon_vcvts_f32_u32:
4622   case NEON::BI__builtin_neon_vcvtd_f64_u64:
4623     usgn = true;
4624     // FALL THROUGH
4625   case NEON::BI__builtin_neon_vcvts_f32_s32:
4626   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
4627     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4628     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
4629     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
4630     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
4631     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
4632     if (usgn)
4633       return Builder.CreateUIToFP(Ops[0], FTy);
4634     return Builder.CreateSIToFP(Ops[0], FTy);
4635   }
4636   case NEON::BI__builtin_neon_vpaddd_s64: {
4637     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
4638     Value *Vec = EmitScalarExpr(E->getArg(0));
4639     // The vector is v2f64, so make sure it's bitcast to that.
4640     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
4641     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
4642     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
4643     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
4644     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
4645     // Pairwise addition of a v2f64 into a scalar f64.
4646     return Builder.CreateAdd(Op0, Op1, "vpaddd");
4647   }
4648   case NEON::BI__builtin_neon_vpaddd_f64: {
4649     llvm::Type *Ty =
4650       llvm::VectorType::get(DoubleTy, 2);
4651     Value *Vec = EmitScalarExpr(E->getArg(0));
4652     // The vector is v2f64, so make sure it's bitcast to that.
4653     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
4654     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
4655     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
4656     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
4657     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
4658     // Pairwise addition of a v2f64 into a scalar f64.
4659     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
4660   }
4661   case NEON::BI__builtin_neon_vpadds_f32: {
4662     llvm::Type *Ty =
4663       llvm::VectorType::get(FloatTy, 2);
4664     Value *Vec = EmitScalarExpr(E->getArg(0));
4665     // The vector is v2f32, so make sure it's bitcast to that.
4666     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
4667     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
4668     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
4669     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
4670     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
4671     // Pairwise addition of a v2f32 into a scalar f32.
4672     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
4673   }
4674   case NEON::BI__builtin_neon_vceqzd_s64:
4675   case NEON::BI__builtin_neon_vceqzd_f64:
4676   case NEON::BI__builtin_neon_vceqzs_f32:
4677     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4678     return EmitAArch64CompareBuiltinExpr(
4679         Ops[0], ConvertType(E->getCallReturnType(getContext())),
4680         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
4681   case NEON::BI__builtin_neon_vcgezd_s64:
4682   case NEON::BI__builtin_neon_vcgezd_f64:
4683   case NEON::BI__builtin_neon_vcgezs_f32:
4684     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4685     return EmitAArch64CompareBuiltinExpr(
4686         Ops[0], ConvertType(E->getCallReturnType(getContext())),
4687         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
4688   case NEON::BI__builtin_neon_vclezd_s64:
4689   case NEON::BI__builtin_neon_vclezd_f64:
4690   case NEON::BI__builtin_neon_vclezs_f32:
4691     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4692     return EmitAArch64CompareBuiltinExpr(
4693         Ops[0], ConvertType(E->getCallReturnType(getContext())),
4694         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
4695   case NEON::BI__builtin_neon_vcgtzd_s64:
4696   case NEON::BI__builtin_neon_vcgtzd_f64:
4697   case NEON::BI__builtin_neon_vcgtzs_f32:
4698     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4699     return EmitAArch64CompareBuiltinExpr(
4700         Ops[0], ConvertType(E->getCallReturnType(getContext())),
4701         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
4702   case NEON::BI__builtin_neon_vcltzd_s64:
4703   case NEON::BI__builtin_neon_vcltzd_f64:
4704   case NEON::BI__builtin_neon_vcltzs_f32:
4705     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4706     return EmitAArch64CompareBuiltinExpr(
4707         Ops[0], ConvertType(E->getCallReturnType(getContext())),
4708         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
4709 
4710   case NEON::BI__builtin_neon_vceqzd_u64: {
4711     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4712     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
4713     Ops[0] =
4714         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
4715     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
4716   }
4717   case NEON::BI__builtin_neon_vceqd_f64:
4718   case NEON::BI__builtin_neon_vcled_f64:
4719   case NEON::BI__builtin_neon_vcltd_f64:
4720   case NEON::BI__builtin_neon_vcged_f64:
4721   case NEON::BI__builtin_neon_vcgtd_f64: {
4722     llvm::CmpInst::Predicate P;
4723     switch (BuiltinID) {
4724     default: llvm_unreachable("missing builtin ID in switch!");
4725     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
4726     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
4727     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
4728     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
4729     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
4730     }
4731     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4732     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
4733     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
4734     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
4735     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
4736   }
4737   case NEON::BI__builtin_neon_vceqs_f32:
4738   case NEON::BI__builtin_neon_vcles_f32:
4739   case NEON::BI__builtin_neon_vclts_f32:
4740   case NEON::BI__builtin_neon_vcges_f32:
4741   case NEON::BI__builtin_neon_vcgts_f32: {
4742     llvm::CmpInst::Predicate P;
4743     switch (BuiltinID) {
4744     default: llvm_unreachable("missing builtin ID in switch!");
4745     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
4746     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
4747     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
4748     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
4749     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
4750     }
4751     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4752     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
4753     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
4754     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
4755     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
4756   }
4757   case NEON::BI__builtin_neon_vceqd_s64:
4758   case NEON::BI__builtin_neon_vceqd_u64:
4759   case NEON::BI__builtin_neon_vcgtd_s64:
4760   case NEON::BI__builtin_neon_vcgtd_u64:
4761   case NEON::BI__builtin_neon_vcltd_s64:
4762   case NEON::BI__builtin_neon_vcltd_u64:
4763   case NEON::BI__builtin_neon_vcged_u64:
4764   case NEON::BI__builtin_neon_vcged_s64:
4765   case NEON::BI__builtin_neon_vcled_u64:
4766   case NEON::BI__builtin_neon_vcled_s64: {
4767     llvm::CmpInst::Predicate P;
4768     switch (BuiltinID) {
4769     default: llvm_unreachable("missing builtin ID in switch!");
4770     case NEON::BI__builtin_neon_vceqd_s64:
4771     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
4772     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
4773     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
4774     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
4775     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
4776     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
4777     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
4778     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
4779     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
4780     }
4781     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4782     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
4783     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
4784     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
4785     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
4786   }
4787   case NEON::BI__builtin_neon_vtstd_s64:
4788   case NEON::BI__builtin_neon_vtstd_u64: {
4789     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4790     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
4791     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
4792     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4793     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4794                                 llvm::Constant::getNullValue(Int64Ty));
4795     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
4796   }
4797   case NEON::BI__builtin_neon_vset_lane_i8:
4798   case NEON::BI__builtin_neon_vset_lane_i16:
4799   case NEON::BI__builtin_neon_vset_lane_i32:
4800   case NEON::BI__builtin_neon_vset_lane_i64:
4801   case NEON::BI__builtin_neon_vset_lane_f32:
4802   case NEON::BI__builtin_neon_vsetq_lane_i8:
4803   case NEON::BI__builtin_neon_vsetq_lane_i16:
4804   case NEON::BI__builtin_neon_vsetq_lane_i32:
4805   case NEON::BI__builtin_neon_vsetq_lane_i64:
4806   case NEON::BI__builtin_neon_vsetq_lane_f32:
4807     Ops.push_back(EmitScalarExpr(E->getArg(2)));
4808     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4809   case NEON::BI__builtin_neon_vset_lane_f64:
4810     // The vector type needs a cast for the v1f64 variant.
4811     Ops[1] = Builder.CreateBitCast(Ops[1],
4812                                    llvm::VectorType::get(DoubleTy, 1));
4813     Ops.push_back(EmitScalarExpr(E->getArg(2)));
4814     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4815   case NEON::BI__builtin_neon_vsetq_lane_f64:
4816     // The vector type needs a cast for the v2f64 variant.
4817     Ops[1] = Builder.CreateBitCast(Ops[1],
4818         llvm::VectorType::get(DoubleTy, 2));
4819     Ops.push_back(EmitScalarExpr(E->getArg(2)));
4820     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4821 
4822   case NEON::BI__builtin_neon_vget_lane_i8:
4823   case NEON::BI__builtin_neon_vdupb_lane_i8:
4824     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
4825     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4826                                         "vget_lane");
4827   case NEON::BI__builtin_neon_vgetq_lane_i8:
4828   case NEON::BI__builtin_neon_vdupb_laneq_i8:
4829     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
4830     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4831                                         "vgetq_lane");
4832   case NEON::BI__builtin_neon_vget_lane_i16:
4833   case NEON::BI__builtin_neon_vduph_lane_i16:
4834     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
4835     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4836                                         "vget_lane");
4837   case NEON::BI__builtin_neon_vgetq_lane_i16:
4838   case NEON::BI__builtin_neon_vduph_laneq_i16:
4839     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
4840     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4841                                         "vgetq_lane");
4842   case NEON::BI__builtin_neon_vget_lane_i32:
4843   case NEON::BI__builtin_neon_vdups_lane_i32:
4844     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
4845     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4846                                         "vget_lane");
4847   case NEON::BI__builtin_neon_vdups_lane_f32:
4848     Ops[0] = Builder.CreateBitCast(Ops[0],
4849         llvm::VectorType::get(FloatTy, 2));
4850     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4851                                         "vdups_lane");
4852   case NEON::BI__builtin_neon_vgetq_lane_i32:
4853   case NEON::BI__builtin_neon_vdups_laneq_i32:
4854     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
4855     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4856                                         "vgetq_lane");
4857   case NEON::BI__builtin_neon_vget_lane_i64:
4858   case NEON::BI__builtin_neon_vdupd_lane_i64:
4859     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
4860     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4861                                         "vget_lane");
4862   case NEON::BI__builtin_neon_vdupd_lane_f64:
4863     Ops[0] = Builder.CreateBitCast(Ops[0],
4864         llvm::VectorType::get(DoubleTy, 1));
4865     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4866                                         "vdupd_lane");
4867   case NEON::BI__builtin_neon_vgetq_lane_i64:
4868   case NEON::BI__builtin_neon_vdupd_laneq_i64:
4869     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
4870     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4871                                         "vgetq_lane");
4872   case NEON::BI__builtin_neon_vget_lane_f32:
4873     Ops[0] = Builder.CreateBitCast(Ops[0],
4874         llvm::VectorType::get(FloatTy, 2));
4875     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4876                                         "vget_lane");
4877   case NEON::BI__builtin_neon_vget_lane_f64:
4878     Ops[0] = Builder.CreateBitCast(Ops[0],
4879         llvm::VectorType::get(DoubleTy, 1));
4880     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4881                                         "vget_lane");
4882   case NEON::BI__builtin_neon_vgetq_lane_f32:
4883   case NEON::BI__builtin_neon_vdups_laneq_f32:
4884     Ops[0] = Builder.CreateBitCast(Ops[0],
4885         llvm::VectorType::get(FloatTy, 4));
4886     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4887                                         "vgetq_lane");
4888   case NEON::BI__builtin_neon_vgetq_lane_f64:
4889   case NEON::BI__builtin_neon_vdupd_laneq_f64:
4890     Ops[0] = Builder.CreateBitCast(Ops[0],
4891         llvm::VectorType::get(DoubleTy, 2));
4892     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4893                                         "vgetq_lane");
4894   case NEON::BI__builtin_neon_vaddd_s64:
4895   case NEON::BI__builtin_neon_vaddd_u64:
4896     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
4897   case NEON::BI__builtin_neon_vsubd_s64:
4898   case NEON::BI__builtin_neon_vsubd_u64:
4899     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
4900   case NEON::BI__builtin_neon_vqdmlalh_s16:
4901   case NEON::BI__builtin_neon_vqdmlslh_s16: {
4902     SmallVector<Value *, 2> ProductOps;
4903     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
4904     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
4905     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
4906     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
4907                           ProductOps, "vqdmlXl");
4908     Constant *CI = ConstantInt::get(SizeTy, 0);
4909     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
4910 
4911     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
4912                                         ? Intrinsic::aarch64_neon_sqadd
4913                                         : Intrinsic::aarch64_neon_sqsub;
4914     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
4915   }
4916   case NEON::BI__builtin_neon_vqshlud_n_s64: {
4917     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4918     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
4919     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
4920                         Ops, "vqshlu_n");
4921   }
4922   case NEON::BI__builtin_neon_vqshld_n_u64:
4923   case NEON::BI__builtin_neon_vqshld_n_s64: {
4924     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
4925                                    ? Intrinsic::aarch64_neon_uqshl
4926                                    : Intrinsic::aarch64_neon_sqshl;
4927     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4928     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
4929     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
4930   }
4931   case NEON::BI__builtin_neon_vrshrd_n_u64:
4932   case NEON::BI__builtin_neon_vrshrd_n_s64: {
4933     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
4934                                    ? Intrinsic::aarch64_neon_urshl
4935                                    : Intrinsic::aarch64_neon_srshl;
4936     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4937     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
4938     Ops[1] = ConstantInt::get(Int64Ty, -SV);
4939     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
4940   }
4941   case NEON::BI__builtin_neon_vrsrad_n_u64:
4942   case NEON::BI__builtin_neon_vrsrad_n_s64: {
4943     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
4944                                    ? Intrinsic::aarch64_neon_urshl
4945                                    : Intrinsic::aarch64_neon_srshl;
4946     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
4947     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
4948     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
4949                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
4950     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
4951   }
4952   case NEON::BI__builtin_neon_vshld_n_s64:
4953   case NEON::BI__builtin_neon_vshld_n_u64: {
4954     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
4955     return Builder.CreateShl(
4956         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
4957   }
4958   case NEON::BI__builtin_neon_vshrd_n_s64: {
4959     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
4960     return Builder.CreateAShr(
4961         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
4962                                                    Amt->getZExtValue())),
4963         "shrd_n");
4964   }
4965   case NEON::BI__builtin_neon_vshrd_n_u64: {
4966     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
4967     uint64_t ShiftAmt = Amt->getZExtValue();
4968     // Right-shifting an unsigned value by its size yields 0.
4969     if (ShiftAmt == 64)
4970       return ConstantInt::get(Int64Ty, 0);
4971     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
4972                               "shrd_n");
4973   }
4974   case NEON::BI__builtin_neon_vsrad_n_s64: {
4975     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
4976     Ops[1] = Builder.CreateAShr(
4977         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
4978                                                    Amt->getZExtValue())),
4979         "shrd_n");
4980     return Builder.CreateAdd(Ops[0], Ops[1]);
4981   }
4982   case NEON::BI__builtin_neon_vsrad_n_u64: {
4983     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
4984     uint64_t ShiftAmt = Amt->getZExtValue();
4985     // Right-shifting an unsigned value by its size yields 0.
4986     // As Op + 0 = Op, return Ops[0] directly.
4987     if (ShiftAmt == 64)
4988       return Ops[0];
4989     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
4990                                 "shrd_n");
4991     return Builder.CreateAdd(Ops[0], Ops[1]);
4992   }
4993   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
4994   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
4995   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
4996   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
4997     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
4998                                           "lane");
4999     SmallVector<Value *, 2> ProductOps;
5000     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5001     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5002     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5003     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5004                           ProductOps, "vqdmlXl");
5005     Constant *CI = ConstantInt::get(SizeTy, 0);
5006     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5007     Ops.pop_back();
5008 
5009     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5010                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5011                           ? Intrinsic::aarch64_neon_sqadd
5012                           : Intrinsic::aarch64_neon_sqsub;
5013     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
5014   }
5015   case NEON::BI__builtin_neon_vqdmlals_s32:
5016   case NEON::BI__builtin_neon_vqdmlsls_s32: {
5017     SmallVector<Value *, 2> ProductOps;
5018     ProductOps.push_back(Ops[1]);
5019     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
5020     Ops[1] =
5021         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5022                      ProductOps, "vqdmlXl");
5023 
5024     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5025                                         ? Intrinsic::aarch64_neon_sqadd
5026                                         : Intrinsic::aarch64_neon_sqsub;
5027     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
5028   }
5029   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5030   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5031   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5032   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5033     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5034                                           "lane");
5035     SmallVector<Value *, 2> ProductOps;
5036     ProductOps.push_back(Ops[1]);
5037     ProductOps.push_back(Ops[2]);
5038     Ops[1] =
5039         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5040                      ProductOps, "vqdmlXl");
5041     Ops.pop_back();
5042 
5043     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
5044                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
5045                           ? Intrinsic::aarch64_neon_sqadd
5046                           : Intrinsic::aarch64_neon_sqsub;
5047     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
5048   }
5049   }
5050 
5051   llvm::VectorType *VTy = GetNeonType(this, Type);
5052   llvm::Type *Ty = VTy;
5053   if (!Ty)
5054     return nullptr;
5055 
5056   // Not all intrinsics handled by the common case work for AArch64 yet, so only
5057   // defer to common code if it's been added to our special map.
5058   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
5059                                    AArch64SIMDIntrinsicsProvenSorted);
5060 
5061   if (Builtin)
5062     return EmitCommonNeonBuiltinExpr(
5063         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5064         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
5065         /*never use addresses*/ Address::invalid(), Address::invalid());
5066 
5067   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
5068     return V;
5069 
5070   unsigned Int;
5071   switch (BuiltinID) {
5072   default: return nullptr;
5073   case NEON::BI__builtin_neon_vbsl_v:
5074   case NEON::BI__builtin_neon_vbslq_v: {
5075     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
5076     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
5077     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
5078     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
5079 
5080     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
5081     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
5082     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
5083     return Builder.CreateBitCast(Ops[0], Ty);
5084   }
5085   case NEON::BI__builtin_neon_vfma_lane_v:
5086   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
5087     // The ARM builtins (and instructions) have the addend as the first
5088     // operand, but the 'fma' intrinsics have it last. Swap it around here.
5089     Value *Addend = Ops[0];
5090     Value *Multiplicand = Ops[1];
5091     Value *LaneSource = Ops[2];
5092     Ops[0] = Multiplicand;
5093     Ops[1] = LaneSource;
5094     Ops[2] = Addend;
5095 
5096     // Now adjust things to handle the lane access.
5097     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
5098       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
5099       VTy;
5100     llvm::Constant *cst = cast<Constant>(Ops[3]);
5101     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
5102     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
5103     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
5104 
5105     Ops.pop_back();
5106     Int = Intrinsic::fma;
5107     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
5108   }
5109   case NEON::BI__builtin_neon_vfma_laneq_v: {
5110     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
5111     // v1f64 fma should be mapped to Neon scalar f64 fma
5112     if (VTy && VTy->getElementType() == DoubleTy) {
5113       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5114       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5115       llvm::Type *VTy = GetNeonType(this,
5116         NeonTypeFlags(NeonTypeFlags::Float64, false, true));
5117       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
5118       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
5119       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
5120       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5121       return Builder.CreateBitCast(Result, Ty);
5122     }
5123     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5124     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5125     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5126 
5127     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
5128                                             VTy->getNumElements() * 2);
5129     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
5130     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
5131                                                cast<ConstantInt>(Ops[3]));
5132     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
5133 
5134     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
5135   }
5136   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
5137     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5138     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5139     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5140 
5141     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5142     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
5143     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
5144   }
5145   case NEON::BI__builtin_neon_vfmas_lane_f32:
5146   case NEON::BI__builtin_neon_vfmas_laneq_f32:
5147   case NEON::BI__builtin_neon_vfmad_lane_f64:
5148   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
5149     Ops.push_back(EmitScalarExpr(E->getArg(3)));
5150     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
5151     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5152     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
5153     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5154   }
5155   case NEON::BI__builtin_neon_vfms_v:
5156   case NEON::BI__builtin_neon_vfmsq_v: {  // Only used for FP types
5157     // FIXME: probably remove when we no longer support aarch64_simd.h
5158     // (arm_neon.h delegates to vfma).
5159 
5160     // The ARM builtins (and instructions) have the addend as the first
5161     // operand, but the 'fma' intrinsics have it last. Swap it around here.
5162     Value *Subtrahend = Ops[0];
5163     Value *Multiplicand = Ops[2];
5164     Ops[0] = Multiplicand;
5165     Ops[2] = Subtrahend;
5166     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
5167     Ops[1] = Builder.CreateFNeg(Ops[1]);
5168     Int = Intrinsic::fma;
5169     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmls");
5170   }
5171   case NEON::BI__builtin_neon_vmull_v:
5172     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5173     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
5174     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
5175     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
5176   case NEON::BI__builtin_neon_vmax_v:
5177   case NEON::BI__builtin_neon_vmaxq_v:
5178     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5179     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
5180     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
5181     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
5182   case NEON::BI__builtin_neon_vmin_v:
5183   case NEON::BI__builtin_neon_vminq_v:
5184     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5185     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
5186     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
5187     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
5188   case NEON::BI__builtin_neon_vabd_v:
5189   case NEON::BI__builtin_neon_vabdq_v:
5190     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5191     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
5192     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
5193     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
5194   case NEON::BI__builtin_neon_vpadal_v:
5195   case NEON::BI__builtin_neon_vpadalq_v: {
5196     unsigned ArgElts = VTy->getNumElements();
5197     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
5198     unsigned BitWidth = EltTy->getBitWidth();
5199     llvm::Type *ArgTy = llvm::VectorType::get(
5200         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
5201     llvm::Type* Tys[2] = { VTy, ArgTy };
5202     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
5203     SmallVector<llvm::Value*, 1> TmpOps;
5204     TmpOps.push_back(Ops[1]);
5205     Function *F = CGM.getIntrinsic(Int, Tys);
5206     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
5207     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
5208     return Builder.CreateAdd(tmp, addend);
5209   }
5210   case NEON::BI__builtin_neon_vpmin_v:
5211   case NEON::BI__builtin_neon_vpminq_v:
5212     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5213     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
5214     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
5215     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
5216   case NEON::BI__builtin_neon_vpmax_v:
5217   case NEON::BI__builtin_neon_vpmaxq_v:
5218     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5219     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
5220     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
5221     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
5222   case NEON::BI__builtin_neon_vminnm_v:
5223   case NEON::BI__builtin_neon_vminnmq_v:
5224     Int = Intrinsic::aarch64_neon_fminnm;
5225     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
5226   case NEON::BI__builtin_neon_vmaxnm_v:
5227   case NEON::BI__builtin_neon_vmaxnmq_v:
5228     Int = Intrinsic::aarch64_neon_fmaxnm;
5229     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
5230   case NEON::BI__builtin_neon_vrecpss_f32: {
5231     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5232     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
5233                         Ops, "vrecps");
5234   }
5235   case NEON::BI__builtin_neon_vrecpsd_f64: {
5236     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5237     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
5238                         Ops, "vrecps");
5239   }
5240   case NEON::BI__builtin_neon_vqshrun_n_v:
5241     Int = Intrinsic::aarch64_neon_sqshrun;
5242     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
5243   case NEON::BI__builtin_neon_vqrshrun_n_v:
5244     Int = Intrinsic::aarch64_neon_sqrshrun;
5245     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
5246   case NEON::BI__builtin_neon_vqshrn_n_v:
5247     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
5248     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
5249   case NEON::BI__builtin_neon_vrshrn_n_v:
5250     Int = Intrinsic::aarch64_neon_rshrn;
5251     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
5252   case NEON::BI__builtin_neon_vqrshrn_n_v:
5253     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
5254     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
5255   case NEON::BI__builtin_neon_vrnda_v:
5256   case NEON::BI__builtin_neon_vrndaq_v: {
5257     Int = Intrinsic::round;
5258     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
5259   }
5260   case NEON::BI__builtin_neon_vrndi_v:
5261   case NEON::BI__builtin_neon_vrndiq_v: {
5262     Int = Intrinsic::nearbyint;
5263     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
5264   }
5265   case NEON::BI__builtin_neon_vrndm_v:
5266   case NEON::BI__builtin_neon_vrndmq_v: {
5267     Int = Intrinsic::floor;
5268     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
5269   }
5270   case NEON::BI__builtin_neon_vrndn_v:
5271   case NEON::BI__builtin_neon_vrndnq_v: {
5272     Int = Intrinsic::aarch64_neon_frintn;
5273     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
5274   }
5275   case NEON::BI__builtin_neon_vrndp_v:
5276   case NEON::BI__builtin_neon_vrndpq_v: {
5277     Int = Intrinsic::ceil;
5278     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
5279   }
5280   case NEON::BI__builtin_neon_vrndx_v:
5281   case NEON::BI__builtin_neon_vrndxq_v: {
5282     Int = Intrinsic::rint;
5283     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
5284   }
5285   case NEON::BI__builtin_neon_vrnd_v:
5286   case NEON::BI__builtin_neon_vrndq_v: {
5287     Int = Intrinsic::trunc;
5288     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
5289   }
5290   case NEON::BI__builtin_neon_vceqz_v:
5291   case NEON::BI__builtin_neon_vceqzq_v:
5292     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
5293                                          ICmpInst::ICMP_EQ, "vceqz");
5294   case NEON::BI__builtin_neon_vcgez_v:
5295   case NEON::BI__builtin_neon_vcgezq_v:
5296     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
5297                                          ICmpInst::ICMP_SGE, "vcgez");
5298   case NEON::BI__builtin_neon_vclez_v:
5299   case NEON::BI__builtin_neon_vclezq_v:
5300     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
5301                                          ICmpInst::ICMP_SLE, "vclez");
5302   case NEON::BI__builtin_neon_vcgtz_v:
5303   case NEON::BI__builtin_neon_vcgtzq_v:
5304     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
5305                                          ICmpInst::ICMP_SGT, "vcgtz");
5306   case NEON::BI__builtin_neon_vcltz_v:
5307   case NEON::BI__builtin_neon_vcltzq_v:
5308     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
5309                                          ICmpInst::ICMP_SLT, "vcltz");
5310   case NEON::BI__builtin_neon_vcvt_f64_v:
5311   case NEON::BI__builtin_neon_vcvtq_f64_v:
5312     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5313     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
5314     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
5315                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
5316   case NEON::BI__builtin_neon_vcvt_f64_f32: {
5317     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
5318            "unexpected vcvt_f64_f32 builtin");
5319     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
5320     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
5321 
5322     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
5323   }
5324   case NEON::BI__builtin_neon_vcvt_f32_f64: {
5325     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
5326            "unexpected vcvt_f32_f64 builtin");
5327     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
5328     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
5329 
5330     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
5331   }
5332   case NEON::BI__builtin_neon_vcvt_s32_v:
5333   case NEON::BI__builtin_neon_vcvt_u32_v:
5334   case NEON::BI__builtin_neon_vcvt_s64_v:
5335   case NEON::BI__builtin_neon_vcvt_u64_v:
5336   case NEON::BI__builtin_neon_vcvtq_s32_v:
5337   case NEON::BI__builtin_neon_vcvtq_u32_v:
5338   case NEON::BI__builtin_neon_vcvtq_s64_v:
5339   case NEON::BI__builtin_neon_vcvtq_u64_v: {
5340     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
5341     if (usgn)
5342       return Builder.CreateFPToUI(Ops[0], Ty);
5343     return Builder.CreateFPToSI(Ops[0], Ty);
5344   }
5345   case NEON::BI__builtin_neon_vcvta_s32_v:
5346   case NEON::BI__builtin_neon_vcvtaq_s32_v:
5347   case NEON::BI__builtin_neon_vcvta_u32_v:
5348   case NEON::BI__builtin_neon_vcvtaq_u32_v:
5349   case NEON::BI__builtin_neon_vcvta_s64_v:
5350   case NEON::BI__builtin_neon_vcvtaq_s64_v:
5351   case NEON::BI__builtin_neon_vcvta_u64_v:
5352   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
5353     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
5354     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5355     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
5356   }
5357   case NEON::BI__builtin_neon_vcvtm_s32_v:
5358   case NEON::BI__builtin_neon_vcvtmq_s32_v:
5359   case NEON::BI__builtin_neon_vcvtm_u32_v:
5360   case NEON::BI__builtin_neon_vcvtmq_u32_v:
5361   case NEON::BI__builtin_neon_vcvtm_s64_v:
5362   case NEON::BI__builtin_neon_vcvtmq_s64_v:
5363   case NEON::BI__builtin_neon_vcvtm_u64_v:
5364   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
5365     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
5366     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5367     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
5368   }
5369   case NEON::BI__builtin_neon_vcvtn_s32_v:
5370   case NEON::BI__builtin_neon_vcvtnq_s32_v:
5371   case NEON::BI__builtin_neon_vcvtn_u32_v:
5372   case NEON::BI__builtin_neon_vcvtnq_u32_v:
5373   case NEON::BI__builtin_neon_vcvtn_s64_v:
5374   case NEON::BI__builtin_neon_vcvtnq_s64_v:
5375   case NEON::BI__builtin_neon_vcvtn_u64_v:
5376   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
5377     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
5378     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5379     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
5380   }
5381   case NEON::BI__builtin_neon_vcvtp_s32_v:
5382   case NEON::BI__builtin_neon_vcvtpq_s32_v:
5383   case NEON::BI__builtin_neon_vcvtp_u32_v:
5384   case NEON::BI__builtin_neon_vcvtpq_u32_v:
5385   case NEON::BI__builtin_neon_vcvtp_s64_v:
5386   case NEON::BI__builtin_neon_vcvtpq_s64_v:
5387   case NEON::BI__builtin_neon_vcvtp_u64_v:
5388   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
5389     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
5390     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5391     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
5392   }
5393   case NEON::BI__builtin_neon_vmulx_v:
5394   case NEON::BI__builtin_neon_vmulxq_v: {
5395     Int = Intrinsic::aarch64_neon_fmulx;
5396     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
5397   }
5398   case NEON::BI__builtin_neon_vmul_lane_v:
5399   case NEON::BI__builtin_neon_vmul_laneq_v: {
5400     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
5401     bool Quad = false;
5402     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
5403       Quad = true;
5404     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5405     llvm::Type *VTy = GetNeonType(this,
5406       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
5407     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
5408     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
5409     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
5410     return Builder.CreateBitCast(Result, Ty);
5411   }
5412   case NEON::BI__builtin_neon_vnegd_s64:
5413     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
5414   case NEON::BI__builtin_neon_vpmaxnm_v:
5415   case NEON::BI__builtin_neon_vpmaxnmq_v: {
5416     Int = Intrinsic::aarch64_neon_fmaxnmp;
5417     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
5418   }
5419   case NEON::BI__builtin_neon_vpminnm_v:
5420   case NEON::BI__builtin_neon_vpminnmq_v: {
5421     Int = Intrinsic::aarch64_neon_fminnmp;
5422     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
5423   }
5424   case NEON::BI__builtin_neon_vsqrt_v:
5425   case NEON::BI__builtin_neon_vsqrtq_v: {
5426     Int = Intrinsic::sqrt;
5427     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5428     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
5429   }
5430   case NEON::BI__builtin_neon_vrbit_v:
5431   case NEON::BI__builtin_neon_vrbitq_v: {
5432     Int = Intrinsic::aarch64_neon_rbit;
5433     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
5434   }
5435   case NEON::BI__builtin_neon_vaddv_u8:
5436     // FIXME: These are handled by the AArch64 scalar code.
5437     usgn = true;
5438     // FALLTHROUGH
5439   case NEON::BI__builtin_neon_vaddv_s8: {
5440     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5441     Ty = Int32Ty;
5442     VTy = llvm::VectorType::get(Int8Ty, 8);
5443     llvm::Type *Tys[2] = { Ty, VTy };
5444     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5445     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5446     return Builder.CreateTrunc(Ops[0], Int8Ty);
5447   }
5448   case NEON::BI__builtin_neon_vaddv_u16:
5449     usgn = true;
5450     // FALLTHROUGH
5451   case NEON::BI__builtin_neon_vaddv_s16: {
5452     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5453     Ty = Int32Ty;
5454     VTy = llvm::VectorType::get(Int16Ty, 4);
5455     llvm::Type *Tys[2] = { Ty, VTy };
5456     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5457     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5458     return Builder.CreateTrunc(Ops[0], Int16Ty);
5459   }
5460   case NEON::BI__builtin_neon_vaddvq_u8:
5461     usgn = true;
5462     // FALLTHROUGH
5463   case NEON::BI__builtin_neon_vaddvq_s8: {
5464     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5465     Ty = Int32Ty;
5466     VTy = llvm::VectorType::get(Int8Ty, 16);
5467     llvm::Type *Tys[2] = { Ty, VTy };
5468     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5469     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5470     return Builder.CreateTrunc(Ops[0], Int8Ty);
5471   }
5472   case NEON::BI__builtin_neon_vaddvq_u16:
5473     usgn = true;
5474     // FALLTHROUGH
5475   case NEON::BI__builtin_neon_vaddvq_s16: {
5476     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5477     Ty = Int32Ty;
5478     VTy = llvm::VectorType::get(Int16Ty, 8);
5479     llvm::Type *Tys[2] = { Ty, VTy };
5480     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5481     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5482     return Builder.CreateTrunc(Ops[0], Int16Ty);
5483   }
5484   case NEON::BI__builtin_neon_vmaxv_u8: {
5485     Int = Intrinsic::aarch64_neon_umaxv;
5486     Ty = Int32Ty;
5487     VTy = llvm::VectorType::get(Int8Ty, 8);
5488     llvm::Type *Tys[2] = { Ty, VTy };
5489     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5490     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5491     return Builder.CreateTrunc(Ops[0], Int8Ty);
5492   }
5493   case NEON::BI__builtin_neon_vmaxv_u16: {
5494     Int = Intrinsic::aarch64_neon_umaxv;
5495     Ty = Int32Ty;
5496     VTy = llvm::VectorType::get(Int16Ty, 4);
5497     llvm::Type *Tys[2] = { Ty, VTy };
5498     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5499     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5500     return Builder.CreateTrunc(Ops[0], Int16Ty);
5501   }
5502   case NEON::BI__builtin_neon_vmaxvq_u8: {
5503     Int = Intrinsic::aarch64_neon_umaxv;
5504     Ty = Int32Ty;
5505     VTy = llvm::VectorType::get(Int8Ty, 16);
5506     llvm::Type *Tys[2] = { Ty, VTy };
5507     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5508     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5509     return Builder.CreateTrunc(Ops[0], Int8Ty);
5510   }
5511   case NEON::BI__builtin_neon_vmaxvq_u16: {
5512     Int = Intrinsic::aarch64_neon_umaxv;
5513     Ty = Int32Ty;
5514     VTy = llvm::VectorType::get(Int16Ty, 8);
5515     llvm::Type *Tys[2] = { Ty, VTy };
5516     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5517     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5518     return Builder.CreateTrunc(Ops[0], Int16Ty);
5519   }
5520   case NEON::BI__builtin_neon_vmaxv_s8: {
5521     Int = Intrinsic::aarch64_neon_smaxv;
5522     Ty = Int32Ty;
5523     VTy = llvm::VectorType::get(Int8Ty, 8);
5524     llvm::Type *Tys[2] = { Ty, VTy };
5525     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5526     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5527     return Builder.CreateTrunc(Ops[0], Int8Ty);
5528   }
5529   case NEON::BI__builtin_neon_vmaxv_s16: {
5530     Int = Intrinsic::aarch64_neon_smaxv;
5531     Ty = Int32Ty;
5532     VTy = llvm::VectorType::get(Int16Ty, 4);
5533     llvm::Type *Tys[2] = { Ty, VTy };
5534     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5535     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5536     return Builder.CreateTrunc(Ops[0], Int16Ty);
5537   }
5538   case NEON::BI__builtin_neon_vmaxvq_s8: {
5539     Int = Intrinsic::aarch64_neon_smaxv;
5540     Ty = Int32Ty;
5541     VTy = llvm::VectorType::get(Int8Ty, 16);
5542     llvm::Type *Tys[2] = { Ty, VTy };
5543     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5544     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5545     return Builder.CreateTrunc(Ops[0], Int8Ty);
5546   }
5547   case NEON::BI__builtin_neon_vmaxvq_s16: {
5548     Int = Intrinsic::aarch64_neon_smaxv;
5549     Ty = Int32Ty;
5550     VTy = llvm::VectorType::get(Int16Ty, 8);
5551     llvm::Type *Tys[2] = { Ty, VTy };
5552     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5553     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5554     return Builder.CreateTrunc(Ops[0], Int16Ty);
5555   }
5556   case NEON::BI__builtin_neon_vminv_u8: {
5557     Int = Intrinsic::aarch64_neon_uminv;
5558     Ty = Int32Ty;
5559     VTy = llvm::VectorType::get(Int8Ty, 8);
5560     llvm::Type *Tys[2] = { Ty, VTy };
5561     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5562     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5563     return Builder.CreateTrunc(Ops[0], Int8Ty);
5564   }
5565   case NEON::BI__builtin_neon_vminv_u16: {
5566     Int = Intrinsic::aarch64_neon_uminv;
5567     Ty = Int32Ty;
5568     VTy = llvm::VectorType::get(Int16Ty, 4);
5569     llvm::Type *Tys[2] = { Ty, VTy };
5570     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5571     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5572     return Builder.CreateTrunc(Ops[0], Int16Ty);
5573   }
5574   case NEON::BI__builtin_neon_vminvq_u8: {
5575     Int = Intrinsic::aarch64_neon_uminv;
5576     Ty = Int32Ty;
5577     VTy = llvm::VectorType::get(Int8Ty, 16);
5578     llvm::Type *Tys[2] = { Ty, VTy };
5579     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5580     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5581     return Builder.CreateTrunc(Ops[0], Int8Ty);
5582   }
5583   case NEON::BI__builtin_neon_vminvq_u16: {
5584     Int = Intrinsic::aarch64_neon_uminv;
5585     Ty = Int32Ty;
5586     VTy = llvm::VectorType::get(Int16Ty, 8);
5587     llvm::Type *Tys[2] = { Ty, VTy };
5588     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5589     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5590     return Builder.CreateTrunc(Ops[0], Int16Ty);
5591   }
5592   case NEON::BI__builtin_neon_vminv_s8: {
5593     Int = Intrinsic::aarch64_neon_sminv;
5594     Ty = Int32Ty;
5595     VTy = llvm::VectorType::get(Int8Ty, 8);
5596     llvm::Type *Tys[2] = { Ty, VTy };
5597     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5598     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5599     return Builder.CreateTrunc(Ops[0], Int8Ty);
5600   }
5601   case NEON::BI__builtin_neon_vminv_s16: {
5602     Int = Intrinsic::aarch64_neon_sminv;
5603     Ty = Int32Ty;
5604     VTy = llvm::VectorType::get(Int16Ty, 4);
5605     llvm::Type *Tys[2] = { Ty, VTy };
5606     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5607     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5608     return Builder.CreateTrunc(Ops[0], Int16Ty);
5609   }
5610   case NEON::BI__builtin_neon_vminvq_s8: {
5611     Int = Intrinsic::aarch64_neon_sminv;
5612     Ty = Int32Ty;
5613     VTy = llvm::VectorType::get(Int8Ty, 16);
5614     llvm::Type *Tys[2] = { Ty, VTy };
5615     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5616     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5617     return Builder.CreateTrunc(Ops[0], Int8Ty);
5618   }
5619   case NEON::BI__builtin_neon_vminvq_s16: {
5620     Int = Intrinsic::aarch64_neon_sminv;
5621     Ty = Int32Ty;
5622     VTy = llvm::VectorType::get(Int16Ty, 8);
5623     llvm::Type *Tys[2] = { Ty, VTy };
5624     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5625     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5626     return Builder.CreateTrunc(Ops[0], Int16Ty);
5627   }
5628   case NEON::BI__builtin_neon_vmul_n_f64: {
5629     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5630     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
5631     return Builder.CreateFMul(Ops[0], RHS);
5632   }
5633   case NEON::BI__builtin_neon_vaddlv_u8: {
5634     Int = Intrinsic::aarch64_neon_uaddlv;
5635     Ty = Int32Ty;
5636     VTy = llvm::VectorType::get(Int8Ty, 8);
5637     llvm::Type *Tys[2] = { Ty, VTy };
5638     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5639     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5640     return Builder.CreateTrunc(Ops[0], Int16Ty);
5641   }
5642   case NEON::BI__builtin_neon_vaddlv_u16: {
5643     Int = Intrinsic::aarch64_neon_uaddlv;
5644     Ty = Int32Ty;
5645     VTy = llvm::VectorType::get(Int16Ty, 4);
5646     llvm::Type *Tys[2] = { Ty, VTy };
5647     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5648     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5649   }
5650   case NEON::BI__builtin_neon_vaddlvq_u8: {
5651     Int = Intrinsic::aarch64_neon_uaddlv;
5652     Ty = Int32Ty;
5653     VTy = llvm::VectorType::get(Int8Ty, 16);
5654     llvm::Type *Tys[2] = { Ty, VTy };
5655     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5656     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5657     return Builder.CreateTrunc(Ops[0], Int16Ty);
5658   }
5659   case NEON::BI__builtin_neon_vaddlvq_u16: {
5660     Int = Intrinsic::aarch64_neon_uaddlv;
5661     Ty = Int32Ty;
5662     VTy = llvm::VectorType::get(Int16Ty, 8);
5663     llvm::Type *Tys[2] = { Ty, VTy };
5664     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5665     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5666   }
5667   case NEON::BI__builtin_neon_vaddlv_s8: {
5668     Int = Intrinsic::aarch64_neon_saddlv;
5669     Ty = Int32Ty;
5670     VTy = llvm::VectorType::get(Int8Ty, 8);
5671     llvm::Type *Tys[2] = { Ty, VTy };
5672     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5673     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5674     return Builder.CreateTrunc(Ops[0], Int16Ty);
5675   }
5676   case NEON::BI__builtin_neon_vaddlv_s16: {
5677     Int = Intrinsic::aarch64_neon_saddlv;
5678     Ty = Int32Ty;
5679     VTy = llvm::VectorType::get(Int16Ty, 4);
5680     llvm::Type *Tys[2] = { Ty, VTy };
5681     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5682     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5683   }
5684   case NEON::BI__builtin_neon_vaddlvq_s8: {
5685     Int = Intrinsic::aarch64_neon_saddlv;
5686     Ty = Int32Ty;
5687     VTy = llvm::VectorType::get(Int8Ty, 16);
5688     llvm::Type *Tys[2] = { Ty, VTy };
5689     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5690     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5691     return Builder.CreateTrunc(Ops[0], Int16Ty);
5692   }
5693   case NEON::BI__builtin_neon_vaddlvq_s16: {
5694     Int = Intrinsic::aarch64_neon_saddlv;
5695     Ty = Int32Ty;
5696     VTy = llvm::VectorType::get(Int16Ty, 8);
5697     llvm::Type *Tys[2] = { Ty, VTy };
5698     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5699     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5700   }
5701   case NEON::BI__builtin_neon_vsri_n_v:
5702   case NEON::BI__builtin_neon_vsriq_n_v: {
5703     Int = Intrinsic::aarch64_neon_vsri;
5704     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
5705     return EmitNeonCall(Intrin, Ops, "vsri_n");
5706   }
5707   case NEON::BI__builtin_neon_vsli_n_v:
5708   case NEON::BI__builtin_neon_vsliq_n_v: {
5709     Int = Intrinsic::aarch64_neon_vsli;
5710     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
5711     return EmitNeonCall(Intrin, Ops, "vsli_n");
5712   }
5713   case NEON::BI__builtin_neon_vsra_n_v:
5714   case NEON::BI__builtin_neon_vsraq_n_v:
5715     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5716     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
5717     return Builder.CreateAdd(Ops[0], Ops[1]);
5718   case NEON::BI__builtin_neon_vrsra_n_v:
5719   case NEON::BI__builtin_neon_vrsraq_n_v: {
5720     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
5721     SmallVector<llvm::Value*,2> TmpOps;
5722     TmpOps.push_back(Ops[1]);
5723     TmpOps.push_back(Ops[2]);
5724     Function* F = CGM.getIntrinsic(Int, Ty);
5725     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
5726     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
5727     return Builder.CreateAdd(Ops[0], tmp);
5728   }
5729     // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
5730     // of an Align parameter here.
5731   case NEON::BI__builtin_neon_vld1_x2_v:
5732   case NEON::BI__builtin_neon_vld1q_x2_v:
5733   case NEON::BI__builtin_neon_vld1_x3_v:
5734   case NEON::BI__builtin_neon_vld1q_x3_v:
5735   case NEON::BI__builtin_neon_vld1_x4_v:
5736   case NEON::BI__builtin_neon_vld1q_x4_v: {
5737     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
5738     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5739     llvm::Type *Tys[2] = { VTy, PTy };
5740     unsigned Int;
5741     switch (BuiltinID) {
5742     case NEON::BI__builtin_neon_vld1_x2_v:
5743     case NEON::BI__builtin_neon_vld1q_x2_v:
5744       Int = Intrinsic::aarch64_neon_ld1x2;
5745       break;
5746     case NEON::BI__builtin_neon_vld1_x3_v:
5747     case NEON::BI__builtin_neon_vld1q_x3_v:
5748       Int = Intrinsic::aarch64_neon_ld1x3;
5749       break;
5750     case NEON::BI__builtin_neon_vld1_x4_v:
5751     case NEON::BI__builtin_neon_vld1q_x4_v:
5752       Int = Intrinsic::aarch64_neon_ld1x4;
5753       break;
5754     }
5755     Function *F = CGM.getIntrinsic(Int, Tys);
5756     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
5757     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5758     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5759     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5760   }
5761   case NEON::BI__builtin_neon_vst1_x2_v:
5762   case NEON::BI__builtin_neon_vst1q_x2_v:
5763   case NEON::BI__builtin_neon_vst1_x3_v:
5764   case NEON::BI__builtin_neon_vst1q_x3_v:
5765   case NEON::BI__builtin_neon_vst1_x4_v:
5766   case NEON::BI__builtin_neon_vst1q_x4_v: {
5767     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
5768     llvm::Type *Tys[2] = { VTy, PTy };
5769     unsigned Int;
5770     switch (BuiltinID) {
5771     case NEON::BI__builtin_neon_vst1_x2_v:
5772     case NEON::BI__builtin_neon_vst1q_x2_v:
5773       Int = Intrinsic::aarch64_neon_st1x2;
5774       break;
5775     case NEON::BI__builtin_neon_vst1_x3_v:
5776     case NEON::BI__builtin_neon_vst1q_x3_v:
5777       Int = Intrinsic::aarch64_neon_st1x3;
5778       break;
5779     case NEON::BI__builtin_neon_vst1_x4_v:
5780     case NEON::BI__builtin_neon_vst1q_x4_v:
5781       Int = Intrinsic::aarch64_neon_st1x4;
5782       break;
5783     }
5784     std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
5785     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
5786   }
5787   case NEON::BI__builtin_neon_vld1_v:
5788   case NEON::BI__builtin_neon_vld1q_v:
5789     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
5790     return Builder.CreateDefaultAlignedLoad(Ops[0]);
5791   case NEON::BI__builtin_neon_vst1_v:
5792   case NEON::BI__builtin_neon_vst1q_v:
5793     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
5794     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
5795     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5796   case NEON::BI__builtin_neon_vld1_lane_v:
5797   case NEON::BI__builtin_neon_vld1q_lane_v:
5798     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5799     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
5800     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5801     Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
5802     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
5803   case NEON::BI__builtin_neon_vld1_dup_v:
5804   case NEON::BI__builtin_neon_vld1q_dup_v: {
5805     Value *V = UndefValue::get(Ty);
5806     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
5807     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5808     Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
5809     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
5810     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
5811     return EmitNeonSplat(Ops[0], CI);
5812   }
5813   case NEON::BI__builtin_neon_vst1_lane_v:
5814   case NEON::BI__builtin_neon_vst1q_lane_v:
5815     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5816     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
5817     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5818     return Builder.CreateDefaultAlignedStore(Ops[1],
5819                                              Builder.CreateBitCast(Ops[0], Ty));
5820   case NEON::BI__builtin_neon_vld2_v:
5821   case NEON::BI__builtin_neon_vld2q_v: {
5822     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
5823     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5824     llvm::Type *Tys[2] = { VTy, PTy };
5825     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
5826     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
5827     Ops[0] = Builder.CreateBitCast(Ops[0],
5828                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5829     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5830   }
5831   case NEON::BI__builtin_neon_vld3_v:
5832   case NEON::BI__builtin_neon_vld3q_v: {
5833     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
5834     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5835     llvm::Type *Tys[2] = { VTy, PTy };
5836     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
5837     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
5838     Ops[0] = Builder.CreateBitCast(Ops[0],
5839                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5840     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5841   }
5842   case NEON::BI__builtin_neon_vld4_v:
5843   case NEON::BI__builtin_neon_vld4q_v: {
5844     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
5845     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5846     llvm::Type *Tys[2] = { VTy, PTy };
5847     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
5848     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
5849     Ops[0] = Builder.CreateBitCast(Ops[0],
5850                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5851     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5852   }
5853   case NEON::BI__builtin_neon_vld2_dup_v:
5854   case NEON::BI__builtin_neon_vld2q_dup_v: {
5855     llvm::Type *PTy =
5856       llvm::PointerType::getUnqual(VTy->getElementType());
5857     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5858     llvm::Type *Tys[2] = { VTy, PTy };
5859     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
5860     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
5861     Ops[0] = Builder.CreateBitCast(Ops[0],
5862                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5863     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5864   }
5865   case NEON::BI__builtin_neon_vld3_dup_v:
5866   case NEON::BI__builtin_neon_vld3q_dup_v: {
5867     llvm::Type *PTy =
5868       llvm::PointerType::getUnqual(VTy->getElementType());
5869     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5870     llvm::Type *Tys[2] = { VTy, PTy };
5871     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
5872     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
5873     Ops[0] = Builder.CreateBitCast(Ops[0],
5874                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5875     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5876   }
5877   case NEON::BI__builtin_neon_vld4_dup_v:
5878   case NEON::BI__builtin_neon_vld4q_dup_v: {
5879     llvm::Type *PTy =
5880       llvm::PointerType::getUnqual(VTy->getElementType());
5881     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5882     llvm::Type *Tys[2] = { VTy, PTy };
5883     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
5884     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
5885     Ops[0] = Builder.CreateBitCast(Ops[0],
5886                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5887     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5888   }
5889   case NEON::BI__builtin_neon_vld2_lane_v:
5890   case NEON::BI__builtin_neon_vld2q_lane_v: {
5891     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
5892     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
5893     Ops.push_back(Ops[1]);
5894     Ops.erase(Ops.begin()+1);
5895     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5896     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5897     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
5898     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
5899     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5900     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5901     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5902   }
5903   case NEON::BI__builtin_neon_vld3_lane_v:
5904   case NEON::BI__builtin_neon_vld3q_lane_v: {
5905     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
5906     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
5907     Ops.push_back(Ops[1]);
5908     Ops.erase(Ops.begin()+1);
5909     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5910     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5911     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
5912     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
5913     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
5914     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5915     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5916     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5917   }
5918   case NEON::BI__builtin_neon_vld4_lane_v:
5919   case NEON::BI__builtin_neon_vld4q_lane_v: {
5920     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
5921     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
5922     Ops.push_back(Ops[1]);
5923     Ops.erase(Ops.begin()+1);
5924     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5925     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5926     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
5927     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
5928     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
5929     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
5930     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5931     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5932     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5933   }
5934   case NEON::BI__builtin_neon_vst2_v:
5935   case NEON::BI__builtin_neon_vst2q_v: {
5936     Ops.push_back(Ops[0]);
5937     Ops.erase(Ops.begin());
5938     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
5939     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
5940                         Ops, "");
5941   }
5942   case NEON::BI__builtin_neon_vst2_lane_v:
5943   case NEON::BI__builtin_neon_vst2q_lane_v: {
5944     Ops.push_back(Ops[0]);
5945     Ops.erase(Ops.begin());
5946     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
5947     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
5948     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
5949                         Ops, "");
5950   }
5951   case NEON::BI__builtin_neon_vst3_v:
5952   case NEON::BI__builtin_neon_vst3q_v: {
5953     Ops.push_back(Ops[0]);
5954     Ops.erase(Ops.begin());
5955     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
5956     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
5957                         Ops, "");
5958   }
5959   case NEON::BI__builtin_neon_vst3_lane_v:
5960   case NEON::BI__builtin_neon_vst3q_lane_v: {
5961     Ops.push_back(Ops[0]);
5962     Ops.erase(Ops.begin());
5963     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
5964     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
5965     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
5966                         Ops, "");
5967   }
5968   case NEON::BI__builtin_neon_vst4_v:
5969   case NEON::BI__builtin_neon_vst4q_v: {
5970     Ops.push_back(Ops[0]);
5971     Ops.erase(Ops.begin());
5972     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
5973     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
5974                         Ops, "");
5975   }
5976   case NEON::BI__builtin_neon_vst4_lane_v:
5977   case NEON::BI__builtin_neon_vst4q_lane_v: {
5978     Ops.push_back(Ops[0]);
5979     Ops.erase(Ops.begin());
5980     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
5981     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
5982     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
5983                         Ops, "");
5984   }
5985   case NEON::BI__builtin_neon_vtrn_v:
5986   case NEON::BI__builtin_neon_vtrnq_v: {
5987     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
5988     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5989     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5990     Value *SV = nullptr;
5991 
5992     for (unsigned vi = 0; vi != 2; ++vi) {
5993       SmallVector<Constant*, 16> Indices;
5994       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
5995         Indices.push_back(ConstantInt::get(Int32Ty, i+vi));
5996         Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi));
5997       }
5998       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
5999       SV = llvm::ConstantVector::get(Indices);
6000       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
6001       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6002     }
6003     return SV;
6004   }
6005   case NEON::BI__builtin_neon_vuzp_v:
6006   case NEON::BI__builtin_neon_vuzpq_v: {
6007     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6008     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6009     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6010     Value *SV = nullptr;
6011 
6012     for (unsigned vi = 0; vi != 2; ++vi) {
6013       SmallVector<Constant*, 16> Indices;
6014       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6015         Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
6016 
6017       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6018       SV = llvm::ConstantVector::get(Indices);
6019       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
6020       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6021     }
6022     return SV;
6023   }
6024   case NEON::BI__builtin_neon_vzip_v:
6025   case NEON::BI__builtin_neon_vzipq_v: {
6026     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6027     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6028     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6029     Value *SV = nullptr;
6030 
6031     for (unsigned vi = 0; vi != 2; ++vi) {
6032       SmallVector<Constant*, 16> Indices;
6033       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6034         Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
6035         Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
6036       }
6037       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6038       SV = llvm::ConstantVector::get(Indices);
6039       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
6040       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6041     }
6042     return SV;
6043   }
6044   case NEON::BI__builtin_neon_vqtbl1q_v: {
6045     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
6046                         Ops, "vtbl1");
6047   }
6048   case NEON::BI__builtin_neon_vqtbl2q_v: {
6049     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
6050                         Ops, "vtbl2");
6051   }
6052   case NEON::BI__builtin_neon_vqtbl3q_v: {
6053     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
6054                         Ops, "vtbl3");
6055   }
6056   case NEON::BI__builtin_neon_vqtbl4q_v: {
6057     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
6058                         Ops, "vtbl4");
6059   }
6060   case NEON::BI__builtin_neon_vqtbx1q_v: {
6061     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
6062                         Ops, "vtbx1");
6063   }
6064   case NEON::BI__builtin_neon_vqtbx2q_v: {
6065     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
6066                         Ops, "vtbx2");
6067   }
6068   case NEON::BI__builtin_neon_vqtbx3q_v: {
6069     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
6070                         Ops, "vtbx3");
6071   }
6072   case NEON::BI__builtin_neon_vqtbx4q_v: {
6073     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
6074                         Ops, "vtbx4");
6075   }
6076   case NEON::BI__builtin_neon_vsqadd_v:
6077   case NEON::BI__builtin_neon_vsqaddq_v: {
6078     Int = Intrinsic::aarch64_neon_usqadd;
6079     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
6080   }
6081   case NEON::BI__builtin_neon_vuqadd_v:
6082   case NEON::BI__builtin_neon_vuqaddq_v: {
6083     Int = Intrinsic::aarch64_neon_suqadd;
6084     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
6085   }
6086   }
6087 }
6088 
6089 llvm::Value *CodeGenFunction::
6090 BuildVector(ArrayRef<llvm::Value*> Ops) {
6091   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
6092          "Not a power-of-two sized vector!");
6093   bool AllConstants = true;
6094   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
6095     AllConstants &= isa<Constant>(Ops[i]);
6096 
6097   // If this is a constant vector, create a ConstantVector.
6098   if (AllConstants) {
6099     SmallVector<llvm::Constant*, 16> CstOps;
6100     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6101       CstOps.push_back(cast<Constant>(Ops[i]));
6102     return llvm::ConstantVector::get(CstOps);
6103   }
6104 
6105   // Otherwise, insertelement the values to build the vector.
6106   Value *Result =
6107     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
6108 
6109   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6110     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
6111 
6112   return Result;
6113 }
6114 
6115 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
6116                                            const CallExpr *E) {
6117   if (BuiltinID == X86::BI__builtin_ms_va_start ||
6118       BuiltinID == X86::BI__builtin_ms_va_end)
6119     return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
6120                           BuiltinID == X86::BI__builtin_ms_va_start);
6121   if (BuiltinID == X86::BI__builtin_ms_va_copy) {
6122     // Lower this manually. We can't reliably determine whether or not any
6123     // given va_copy() is for a Win64 va_list from the calling convention
6124     // alone, because it's legal to do this from a System V ABI function.
6125     // With opaque pointer types, we won't have enough information in LLVM
6126     // IR to determine this from the argument types, either. Best to do it
6127     // now, while we have enough information.
6128     Address DestAddr = EmitMSVAListRef(E->getArg(0));
6129     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6130 
6131     llvm::Type *BPP = Int8PtrPtrTy;
6132 
6133     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
6134                        DestAddr.getAlignment());
6135     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
6136                       SrcAddr.getAlignment());
6137 
6138     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6139     return Builder.CreateStore(ArgPtr, DestAddr);
6140   }
6141 
6142   SmallVector<Value*, 4> Ops;
6143 
6144   // Find out if any arguments are required to be integer constant expressions.
6145   unsigned ICEArguments = 0;
6146   ASTContext::GetBuiltinTypeError Error;
6147   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6148   assert(Error == ASTContext::GE_None && "Should not codegen an error");
6149 
6150   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
6151     // If this is a normal argument, just emit it as a scalar.
6152     if ((ICEArguments & (1 << i)) == 0) {
6153       Ops.push_back(EmitScalarExpr(E->getArg(i)));
6154       continue;
6155     }
6156 
6157     // If this is required to be a constant, constant fold it so that we know
6158     // that the generated intrinsic gets a ConstantInt.
6159     llvm::APSInt Result;
6160     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
6161     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
6162     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
6163   }
6164 
6165   switch (BuiltinID) {
6166   default: return nullptr;
6167   case X86::BI__builtin_cpu_supports: {
6168     const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
6169     StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
6170 
6171     // TODO: When/if this becomes more than x86 specific then use a TargetInfo
6172     // based mapping.
6173     // Processor features and mapping to processor feature value.
6174     enum X86Features {
6175       CMOV = 0,
6176       MMX,
6177       POPCNT,
6178       SSE,
6179       SSE2,
6180       SSE3,
6181       SSSE3,
6182       SSE4_1,
6183       SSE4_2,
6184       AVX,
6185       AVX2,
6186       SSE4_A,
6187       FMA4,
6188       XOP,
6189       FMA,
6190       AVX512F,
6191       BMI,
6192       BMI2,
6193       MAX
6194     };
6195 
6196     X86Features Feature = StringSwitch<X86Features>(FeatureStr)
6197                               .Case("cmov", X86Features::CMOV)
6198                               .Case("mmx", X86Features::MMX)
6199                               .Case("popcnt", X86Features::POPCNT)
6200                               .Case("sse", X86Features::SSE)
6201                               .Case("sse2", X86Features::SSE2)
6202                               .Case("sse3", X86Features::SSE3)
6203                               .Case("sse4.1", X86Features::SSE4_1)
6204                               .Case("sse4.2", X86Features::SSE4_2)
6205                               .Case("avx", X86Features::AVX)
6206                               .Case("avx2", X86Features::AVX2)
6207                               .Case("sse4a", X86Features::SSE4_A)
6208                               .Case("fma4", X86Features::FMA4)
6209                               .Case("xop", X86Features::XOP)
6210                               .Case("fma", X86Features::FMA)
6211                               .Case("avx512f", X86Features::AVX512F)
6212                               .Case("bmi", X86Features::BMI)
6213                               .Case("bmi2", X86Features::BMI2)
6214                               .Default(X86Features::MAX);
6215     assert(Feature != X86Features::MAX && "Invalid feature!");
6216 
6217     // Matching the struct layout from the compiler-rt/libgcc structure that is
6218     // filled in:
6219     // unsigned int __cpu_vendor;
6220     // unsigned int __cpu_type;
6221     // unsigned int __cpu_subtype;
6222     // unsigned int __cpu_features[1];
6223     llvm::Type *STy = llvm::StructType::get(
6224         Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr);
6225 
6226     // Grab the global __cpu_model.
6227     llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
6228 
6229     // Grab the first (0th) element from the field __cpu_features off of the
6230     // global in the struct STy.
6231     Value *Idxs[] = {
6232       ConstantInt::get(Int32Ty, 0),
6233       ConstantInt::get(Int32Ty, 3),
6234       ConstantInt::get(Int32Ty, 0)
6235     };
6236     Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
6237     Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
6238                                                 CharUnits::fromQuantity(4));
6239 
6240     // Check the value of the bit corresponding to the feature requested.
6241     Value *Bitset = Builder.CreateAnd(
6242         Features, llvm::ConstantInt::get(Int32Ty, 1 << Feature));
6243     return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
6244   }
6245   case X86::BI_mm_prefetch: {
6246     Value *Address = Ops[0];
6247     Value *RW = ConstantInt::get(Int32Ty, 0);
6248     Value *Locality = Ops[1];
6249     Value *Data = ConstantInt::get(Int32Ty, 1);
6250     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
6251     return Builder.CreateCall(F, {Address, RW, Locality, Data});
6252   }
6253   case X86::BI__builtin_ia32_undef128:
6254   case X86::BI__builtin_ia32_undef256:
6255   case X86::BI__builtin_ia32_undef512:
6256     return UndefValue::get(ConvertType(E->getType()));
6257   case X86::BI__builtin_ia32_vec_init_v8qi:
6258   case X86::BI__builtin_ia32_vec_init_v4hi:
6259   case X86::BI__builtin_ia32_vec_init_v2si:
6260     return Builder.CreateBitCast(BuildVector(Ops),
6261                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
6262   case X86::BI__builtin_ia32_vec_ext_v2si:
6263     return Builder.CreateExtractElement(Ops[0],
6264                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
6265   case X86::BI__builtin_ia32_ldmxcsr: {
6266     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
6267     Builder.CreateStore(Ops[0], Tmp);
6268     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
6269                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
6270   }
6271   case X86::BI__builtin_ia32_stmxcsr: {
6272     Address Tmp = CreateMemTemp(E->getType());
6273     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
6274                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
6275     return Builder.CreateLoad(Tmp, "stmxcsr");
6276   }
6277   case X86::BI__builtin_ia32_xsave:
6278   case X86::BI__builtin_ia32_xsave64:
6279   case X86::BI__builtin_ia32_xrstor:
6280   case X86::BI__builtin_ia32_xrstor64:
6281   case X86::BI__builtin_ia32_xsaveopt:
6282   case X86::BI__builtin_ia32_xsaveopt64:
6283   case X86::BI__builtin_ia32_xrstors:
6284   case X86::BI__builtin_ia32_xrstors64:
6285   case X86::BI__builtin_ia32_xsavec:
6286   case X86::BI__builtin_ia32_xsavec64:
6287   case X86::BI__builtin_ia32_xsaves:
6288   case X86::BI__builtin_ia32_xsaves64: {
6289     Intrinsic::ID ID;
6290 #define INTRINSIC_X86_XSAVE_ID(NAME) \
6291     case X86::BI__builtin_ia32_##NAME: \
6292       ID = Intrinsic::x86_##NAME; \
6293       break
6294     switch (BuiltinID) {
6295     default: llvm_unreachable("Unsupported intrinsic!");
6296     INTRINSIC_X86_XSAVE_ID(xsave);
6297     INTRINSIC_X86_XSAVE_ID(xsave64);
6298     INTRINSIC_X86_XSAVE_ID(xrstor);
6299     INTRINSIC_X86_XSAVE_ID(xrstor64);
6300     INTRINSIC_X86_XSAVE_ID(xsaveopt);
6301     INTRINSIC_X86_XSAVE_ID(xsaveopt64);
6302     INTRINSIC_X86_XSAVE_ID(xrstors);
6303     INTRINSIC_X86_XSAVE_ID(xrstors64);
6304     INTRINSIC_X86_XSAVE_ID(xsavec);
6305     INTRINSIC_X86_XSAVE_ID(xsavec64);
6306     INTRINSIC_X86_XSAVE_ID(xsaves);
6307     INTRINSIC_X86_XSAVE_ID(xsaves64);
6308     }
6309 #undef INTRINSIC_X86_XSAVE_ID
6310     Value *Mhi = Builder.CreateTrunc(
6311       Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
6312     Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
6313     Ops[1] = Mhi;
6314     Ops.push_back(Mlo);
6315     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
6316   }
6317   case X86::BI__builtin_ia32_storehps:
6318   case X86::BI__builtin_ia32_storelps: {
6319     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
6320     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
6321 
6322     // cast val v2i64
6323     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
6324 
6325     // extract (0, 1)
6326     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
6327     llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
6328     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
6329 
6330     // cast pointer to i64 & store
6331     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
6332     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6333   }
6334   case X86::BI__builtin_ia32_palignr128:
6335   case X86::BI__builtin_ia32_palignr256: {
6336     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
6337 
6338     unsigned NumElts =
6339       cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
6340     assert(NumElts % 16 == 0);
6341     unsigned NumLanes = NumElts / 16;
6342     unsigned NumLaneElts = NumElts / NumLanes;
6343 
6344     // If palignr is shifting the pair of vectors more than the size of two
6345     // lanes, emit zero.
6346     if (ShiftVal >= (2 * NumLaneElts))
6347       return llvm::Constant::getNullValue(ConvertType(E->getType()));
6348 
6349     // If palignr is shifting the pair of input vectors more than one lane,
6350     // but less than two lanes, convert to shifting in zeroes.
6351     if (ShiftVal > NumLaneElts) {
6352       ShiftVal -= NumLaneElts;
6353       Ops[1] = Ops[0];
6354       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
6355     }
6356 
6357     uint32_t Indices[32];
6358     // 256-bit palignr operates on 128-bit lanes so we need to handle that
6359     for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
6360       for (unsigned i = 0; i != NumLaneElts; ++i) {
6361         unsigned Idx = ShiftVal + i;
6362         if (Idx >= NumLaneElts)
6363           Idx += NumElts - NumLaneElts; // End of lane, switch operand.
6364         Indices[l + i] = Idx + l;
6365       }
6366     }
6367 
6368     Value *SV = llvm::ConstantDataVector::get(getLLVMContext(),
6369                                               makeArrayRef(Indices, NumElts));
6370     return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
6371   }
6372   case X86::BI__builtin_ia32_pslldqi256: {
6373     // Shift value is in bits so divide by 8.
6374     unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
6375 
6376     // If pslldq is shifting the vector more than 15 bytes, emit zero.
6377     if (shiftVal >= 16)
6378       return llvm::Constant::getNullValue(ConvertType(E->getType()));
6379 
6380     uint32_t Indices[32];
6381     // 256-bit pslldq operates on 128-bit lanes so we need to handle that
6382     for (unsigned l = 0; l != 32; l += 16) {
6383       for (unsigned i = 0; i != 16; ++i) {
6384         unsigned Idx = 32 + i - shiftVal;
6385         if (Idx < 32) Idx -= 16; // end of lane, switch operand.
6386         Indices[l + i] = Idx + l;
6387       }
6388     }
6389 
6390     llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
6391     Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
6392     Value *Zero = llvm::Constant::getNullValue(VecTy);
6393 
6394     Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
6395     SV = Builder.CreateShuffleVector(Zero, Ops[0], SV, "pslldq");
6396     llvm::Type *ResultType = ConvertType(E->getType());
6397     return Builder.CreateBitCast(SV, ResultType, "cast");
6398   }
6399   case X86::BI__builtin_ia32_psrldqi256: {
6400     // Shift value is in bits so divide by 8.
6401     unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
6402 
6403     // If psrldq is shifting the vector more than 15 bytes, emit zero.
6404     if (shiftVal >= 16)
6405       return llvm::Constant::getNullValue(ConvertType(E->getType()));
6406 
6407     uint32_t Indices[32];
6408     // 256-bit psrldq operates on 128-bit lanes so we need to handle that
6409     for (unsigned l = 0; l != 32; l += 16) {
6410       for (unsigned i = 0; i != 16; ++i) {
6411         unsigned Idx = i + shiftVal;
6412         if (Idx >= 16) Idx += 16; // end of lane, switch operand.
6413         Indices[l + i] = Idx + l;
6414       }
6415     }
6416 
6417     llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
6418     Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
6419     Value *Zero = llvm::Constant::getNullValue(VecTy);
6420 
6421     Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
6422     SV = Builder.CreateShuffleVector(Ops[0], Zero, SV, "psrldq");
6423     llvm::Type *ResultType = ConvertType(E->getType());
6424     return Builder.CreateBitCast(SV, ResultType, "cast");
6425   }
6426   case X86::BI__builtin_ia32_movntps:
6427   case X86::BI__builtin_ia32_movntps256:
6428   case X86::BI__builtin_ia32_movntpd:
6429   case X86::BI__builtin_ia32_movntpd256:
6430   case X86::BI__builtin_ia32_movntdq:
6431   case X86::BI__builtin_ia32_movntdq256:
6432   case X86::BI__builtin_ia32_movnti:
6433   case X86::BI__builtin_ia32_movnti64: {
6434     llvm::MDNode *Node = llvm::MDNode::get(
6435         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
6436 
6437     // Convert the type of the pointer to a pointer to the stored type.
6438     Value *BC = Builder.CreateBitCast(Ops[0],
6439                                 llvm::PointerType::getUnqual(Ops[1]->getType()),
6440                                       "cast");
6441     StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC);
6442     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
6443 
6444     // If the operand is an integer, we can't assume alignment. Otherwise,
6445     // assume natural alignment.
6446     QualType ArgTy = E->getArg(1)->getType();
6447     unsigned Align;
6448     if (ArgTy->isIntegerType())
6449       Align = 1;
6450     else
6451       Align = getContext().getTypeSizeInChars(ArgTy).getQuantity();
6452     SI->setAlignment(Align);
6453     return SI;
6454   }
6455   // 3DNow!
6456   case X86::BI__builtin_ia32_pswapdsf:
6457   case X86::BI__builtin_ia32_pswapdsi: {
6458     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
6459     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
6460     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
6461     return Builder.CreateCall(F, Ops, "pswapd");
6462   }
6463   case X86::BI__builtin_ia32_rdrand16_step:
6464   case X86::BI__builtin_ia32_rdrand32_step:
6465   case X86::BI__builtin_ia32_rdrand64_step:
6466   case X86::BI__builtin_ia32_rdseed16_step:
6467   case X86::BI__builtin_ia32_rdseed32_step:
6468   case X86::BI__builtin_ia32_rdseed64_step: {
6469     Intrinsic::ID ID;
6470     switch (BuiltinID) {
6471     default: llvm_unreachable("Unsupported intrinsic!");
6472     case X86::BI__builtin_ia32_rdrand16_step:
6473       ID = Intrinsic::x86_rdrand_16;
6474       break;
6475     case X86::BI__builtin_ia32_rdrand32_step:
6476       ID = Intrinsic::x86_rdrand_32;
6477       break;
6478     case X86::BI__builtin_ia32_rdrand64_step:
6479       ID = Intrinsic::x86_rdrand_64;
6480       break;
6481     case X86::BI__builtin_ia32_rdseed16_step:
6482       ID = Intrinsic::x86_rdseed_16;
6483       break;
6484     case X86::BI__builtin_ia32_rdseed32_step:
6485       ID = Intrinsic::x86_rdseed_32;
6486       break;
6487     case X86::BI__builtin_ia32_rdseed64_step:
6488       ID = Intrinsic::x86_rdseed_64;
6489       break;
6490     }
6491 
6492     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
6493     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
6494                                       Ops[0]);
6495     return Builder.CreateExtractValue(Call, 1);
6496   }
6497   // SSE comparison intrisics
6498   case X86::BI__builtin_ia32_cmpeqps:
6499   case X86::BI__builtin_ia32_cmpltps:
6500   case X86::BI__builtin_ia32_cmpleps:
6501   case X86::BI__builtin_ia32_cmpunordps:
6502   case X86::BI__builtin_ia32_cmpneqps:
6503   case X86::BI__builtin_ia32_cmpnltps:
6504   case X86::BI__builtin_ia32_cmpnleps:
6505   case X86::BI__builtin_ia32_cmpordps:
6506   case X86::BI__builtin_ia32_cmpeqss:
6507   case X86::BI__builtin_ia32_cmpltss:
6508   case X86::BI__builtin_ia32_cmpless:
6509   case X86::BI__builtin_ia32_cmpunordss:
6510   case X86::BI__builtin_ia32_cmpneqss:
6511   case X86::BI__builtin_ia32_cmpnltss:
6512   case X86::BI__builtin_ia32_cmpnless:
6513   case X86::BI__builtin_ia32_cmpordss:
6514   case X86::BI__builtin_ia32_cmpeqpd:
6515   case X86::BI__builtin_ia32_cmpltpd:
6516   case X86::BI__builtin_ia32_cmplepd:
6517   case X86::BI__builtin_ia32_cmpunordpd:
6518   case X86::BI__builtin_ia32_cmpneqpd:
6519   case X86::BI__builtin_ia32_cmpnltpd:
6520   case X86::BI__builtin_ia32_cmpnlepd:
6521   case X86::BI__builtin_ia32_cmpordpd:
6522   case X86::BI__builtin_ia32_cmpeqsd:
6523   case X86::BI__builtin_ia32_cmpltsd:
6524   case X86::BI__builtin_ia32_cmplesd:
6525   case X86::BI__builtin_ia32_cmpunordsd:
6526   case X86::BI__builtin_ia32_cmpneqsd:
6527   case X86::BI__builtin_ia32_cmpnltsd:
6528   case X86::BI__builtin_ia32_cmpnlesd:
6529   case X86::BI__builtin_ia32_cmpordsd:
6530     // These exist so that the builtin that takes an immediate can be bounds
6531     // checked by clang to avoid passing bad immediates to the backend. Since
6532     // AVX has a larger immediate than SSE we would need separate builtins to
6533     // do the different bounds checking. Rather than create a clang specific
6534     // SSE only builtin, this implements eight separate builtins to match gcc
6535     // implementation.
6536 
6537     // Choose the immediate.
6538     unsigned Imm;
6539     switch (BuiltinID) {
6540     default: llvm_unreachable("Unsupported intrinsic!");
6541     case X86::BI__builtin_ia32_cmpeqps:
6542     case X86::BI__builtin_ia32_cmpeqss:
6543     case X86::BI__builtin_ia32_cmpeqpd:
6544     case X86::BI__builtin_ia32_cmpeqsd:
6545       Imm = 0;
6546       break;
6547     case X86::BI__builtin_ia32_cmpltps:
6548     case X86::BI__builtin_ia32_cmpltss:
6549     case X86::BI__builtin_ia32_cmpltpd:
6550     case X86::BI__builtin_ia32_cmpltsd:
6551       Imm = 1;
6552       break;
6553     case X86::BI__builtin_ia32_cmpleps:
6554     case X86::BI__builtin_ia32_cmpless:
6555     case X86::BI__builtin_ia32_cmplepd:
6556     case X86::BI__builtin_ia32_cmplesd:
6557       Imm = 2;
6558       break;
6559     case X86::BI__builtin_ia32_cmpunordps:
6560     case X86::BI__builtin_ia32_cmpunordss:
6561     case X86::BI__builtin_ia32_cmpunordpd:
6562     case X86::BI__builtin_ia32_cmpunordsd:
6563       Imm = 3;
6564       break;
6565     case X86::BI__builtin_ia32_cmpneqps:
6566     case X86::BI__builtin_ia32_cmpneqss:
6567     case X86::BI__builtin_ia32_cmpneqpd:
6568     case X86::BI__builtin_ia32_cmpneqsd:
6569       Imm = 4;
6570       break;
6571     case X86::BI__builtin_ia32_cmpnltps:
6572     case X86::BI__builtin_ia32_cmpnltss:
6573     case X86::BI__builtin_ia32_cmpnltpd:
6574     case X86::BI__builtin_ia32_cmpnltsd:
6575       Imm = 5;
6576       break;
6577     case X86::BI__builtin_ia32_cmpnleps:
6578     case X86::BI__builtin_ia32_cmpnless:
6579     case X86::BI__builtin_ia32_cmpnlepd:
6580     case X86::BI__builtin_ia32_cmpnlesd:
6581       Imm = 6;
6582       break;
6583     case X86::BI__builtin_ia32_cmpordps:
6584     case X86::BI__builtin_ia32_cmpordss:
6585     case X86::BI__builtin_ia32_cmpordpd:
6586     case X86::BI__builtin_ia32_cmpordsd:
6587       Imm = 7;
6588       break;
6589     }
6590 
6591     // Choose the intrinsic ID.
6592     const char *name;
6593     Intrinsic::ID ID;
6594     switch (BuiltinID) {
6595     default: llvm_unreachable("Unsupported intrinsic!");
6596     case X86::BI__builtin_ia32_cmpeqps:
6597     case X86::BI__builtin_ia32_cmpltps:
6598     case X86::BI__builtin_ia32_cmpleps:
6599     case X86::BI__builtin_ia32_cmpunordps:
6600     case X86::BI__builtin_ia32_cmpneqps:
6601     case X86::BI__builtin_ia32_cmpnltps:
6602     case X86::BI__builtin_ia32_cmpnleps:
6603     case X86::BI__builtin_ia32_cmpordps:
6604       name = "cmpps";
6605       ID = Intrinsic::x86_sse_cmp_ps;
6606       break;
6607     case X86::BI__builtin_ia32_cmpeqss:
6608     case X86::BI__builtin_ia32_cmpltss:
6609     case X86::BI__builtin_ia32_cmpless:
6610     case X86::BI__builtin_ia32_cmpunordss:
6611     case X86::BI__builtin_ia32_cmpneqss:
6612     case X86::BI__builtin_ia32_cmpnltss:
6613     case X86::BI__builtin_ia32_cmpnless:
6614     case X86::BI__builtin_ia32_cmpordss:
6615       name = "cmpss";
6616       ID = Intrinsic::x86_sse_cmp_ss;
6617       break;
6618     case X86::BI__builtin_ia32_cmpeqpd:
6619     case X86::BI__builtin_ia32_cmpltpd:
6620     case X86::BI__builtin_ia32_cmplepd:
6621     case X86::BI__builtin_ia32_cmpunordpd:
6622     case X86::BI__builtin_ia32_cmpneqpd:
6623     case X86::BI__builtin_ia32_cmpnltpd:
6624     case X86::BI__builtin_ia32_cmpnlepd:
6625     case X86::BI__builtin_ia32_cmpordpd:
6626       name = "cmppd";
6627       ID = Intrinsic::x86_sse2_cmp_pd;
6628       break;
6629     case X86::BI__builtin_ia32_cmpeqsd:
6630     case X86::BI__builtin_ia32_cmpltsd:
6631     case X86::BI__builtin_ia32_cmplesd:
6632     case X86::BI__builtin_ia32_cmpunordsd:
6633     case X86::BI__builtin_ia32_cmpneqsd:
6634     case X86::BI__builtin_ia32_cmpnltsd:
6635     case X86::BI__builtin_ia32_cmpnlesd:
6636     case X86::BI__builtin_ia32_cmpordsd:
6637       name = "cmpsd";
6638       ID = Intrinsic::x86_sse2_cmp_sd;
6639       break;
6640     }
6641 
6642     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
6643     llvm::Function *F = CGM.getIntrinsic(ID);
6644     return Builder.CreateCall(F, Ops, name);
6645   }
6646 }
6647 
6648 
6649 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
6650                                            const CallExpr *E) {
6651   SmallVector<Value*, 4> Ops;
6652 
6653   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
6654     Ops.push_back(EmitScalarExpr(E->getArg(i)));
6655 
6656   Intrinsic::ID ID = Intrinsic::not_intrinsic;
6657 
6658   switch (BuiltinID) {
6659   default: return nullptr;
6660 
6661   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
6662   // call __builtin_readcyclecounter.
6663   case PPC::BI__builtin_ppc_get_timebase:
6664     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
6665 
6666   // vec_ld, vec_lvsl, vec_lvsr
6667   case PPC::BI__builtin_altivec_lvx:
6668   case PPC::BI__builtin_altivec_lvxl:
6669   case PPC::BI__builtin_altivec_lvebx:
6670   case PPC::BI__builtin_altivec_lvehx:
6671   case PPC::BI__builtin_altivec_lvewx:
6672   case PPC::BI__builtin_altivec_lvsl:
6673   case PPC::BI__builtin_altivec_lvsr:
6674   case PPC::BI__builtin_vsx_lxvd2x:
6675   case PPC::BI__builtin_vsx_lxvw4x:
6676   {
6677     Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
6678 
6679     Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
6680     Ops.pop_back();
6681 
6682     switch (BuiltinID) {
6683     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
6684     case PPC::BI__builtin_altivec_lvx:
6685       ID = Intrinsic::ppc_altivec_lvx;
6686       break;
6687     case PPC::BI__builtin_altivec_lvxl:
6688       ID = Intrinsic::ppc_altivec_lvxl;
6689       break;
6690     case PPC::BI__builtin_altivec_lvebx:
6691       ID = Intrinsic::ppc_altivec_lvebx;
6692       break;
6693     case PPC::BI__builtin_altivec_lvehx:
6694       ID = Intrinsic::ppc_altivec_lvehx;
6695       break;
6696     case PPC::BI__builtin_altivec_lvewx:
6697       ID = Intrinsic::ppc_altivec_lvewx;
6698       break;
6699     case PPC::BI__builtin_altivec_lvsl:
6700       ID = Intrinsic::ppc_altivec_lvsl;
6701       break;
6702     case PPC::BI__builtin_altivec_lvsr:
6703       ID = Intrinsic::ppc_altivec_lvsr;
6704       break;
6705     case PPC::BI__builtin_vsx_lxvd2x:
6706       ID = Intrinsic::ppc_vsx_lxvd2x;
6707       break;
6708     case PPC::BI__builtin_vsx_lxvw4x:
6709       ID = Intrinsic::ppc_vsx_lxvw4x;
6710       break;
6711     }
6712     llvm::Function *F = CGM.getIntrinsic(ID);
6713     return Builder.CreateCall(F, Ops, "");
6714   }
6715 
6716   // vec_st
6717   case PPC::BI__builtin_altivec_stvx:
6718   case PPC::BI__builtin_altivec_stvxl:
6719   case PPC::BI__builtin_altivec_stvebx:
6720   case PPC::BI__builtin_altivec_stvehx:
6721   case PPC::BI__builtin_altivec_stvewx:
6722   case PPC::BI__builtin_vsx_stxvd2x:
6723   case PPC::BI__builtin_vsx_stxvw4x:
6724   {
6725     Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
6726     Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
6727     Ops.pop_back();
6728 
6729     switch (BuiltinID) {
6730     default: llvm_unreachable("Unsupported st intrinsic!");
6731     case PPC::BI__builtin_altivec_stvx:
6732       ID = Intrinsic::ppc_altivec_stvx;
6733       break;
6734     case PPC::BI__builtin_altivec_stvxl:
6735       ID = Intrinsic::ppc_altivec_stvxl;
6736       break;
6737     case PPC::BI__builtin_altivec_stvebx:
6738       ID = Intrinsic::ppc_altivec_stvebx;
6739       break;
6740     case PPC::BI__builtin_altivec_stvehx:
6741       ID = Intrinsic::ppc_altivec_stvehx;
6742       break;
6743     case PPC::BI__builtin_altivec_stvewx:
6744       ID = Intrinsic::ppc_altivec_stvewx;
6745       break;
6746     case PPC::BI__builtin_vsx_stxvd2x:
6747       ID = Intrinsic::ppc_vsx_stxvd2x;
6748       break;
6749     case PPC::BI__builtin_vsx_stxvw4x:
6750       ID = Intrinsic::ppc_vsx_stxvw4x;
6751       break;
6752     }
6753     llvm::Function *F = CGM.getIntrinsic(ID);
6754     return Builder.CreateCall(F, Ops, "");
6755   }
6756   // Square root
6757   case PPC::BI__builtin_vsx_xvsqrtsp:
6758   case PPC::BI__builtin_vsx_xvsqrtdp: {
6759     llvm::Type *ResultType = ConvertType(E->getType());
6760     Value *X = EmitScalarExpr(E->getArg(0));
6761     ID = Intrinsic::sqrt;
6762     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
6763     return Builder.CreateCall(F, X);
6764   }
6765   // Count leading zeros
6766   case PPC::BI__builtin_altivec_vclzb:
6767   case PPC::BI__builtin_altivec_vclzh:
6768   case PPC::BI__builtin_altivec_vclzw:
6769   case PPC::BI__builtin_altivec_vclzd: {
6770     llvm::Type *ResultType = ConvertType(E->getType());
6771     Value *X = EmitScalarExpr(E->getArg(0));
6772     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
6773     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
6774     return Builder.CreateCall(F, {X, Undef});
6775   }
6776   // Copy sign
6777   case PPC::BI__builtin_vsx_xvcpsgnsp:
6778   case PPC::BI__builtin_vsx_xvcpsgndp: {
6779     llvm::Type *ResultType = ConvertType(E->getType());
6780     Value *X = EmitScalarExpr(E->getArg(0));
6781     Value *Y = EmitScalarExpr(E->getArg(1));
6782     ID = Intrinsic::copysign;
6783     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
6784     return Builder.CreateCall(F, {X, Y});
6785   }
6786   // Rounding/truncation
6787   case PPC::BI__builtin_vsx_xvrspip:
6788   case PPC::BI__builtin_vsx_xvrdpip:
6789   case PPC::BI__builtin_vsx_xvrdpim:
6790   case PPC::BI__builtin_vsx_xvrspim:
6791   case PPC::BI__builtin_vsx_xvrdpi:
6792   case PPC::BI__builtin_vsx_xvrspi:
6793   case PPC::BI__builtin_vsx_xvrdpic:
6794   case PPC::BI__builtin_vsx_xvrspic:
6795   case PPC::BI__builtin_vsx_xvrdpiz:
6796   case PPC::BI__builtin_vsx_xvrspiz: {
6797     llvm::Type *ResultType = ConvertType(E->getType());
6798     Value *X = EmitScalarExpr(E->getArg(0));
6799     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
6800         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
6801       ID = Intrinsic::floor;
6802     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
6803              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
6804       ID = Intrinsic::round;
6805     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
6806              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
6807       ID = Intrinsic::nearbyint;
6808     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
6809              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
6810       ID = Intrinsic::ceil;
6811     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
6812              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
6813       ID = Intrinsic::trunc;
6814     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
6815     return Builder.CreateCall(F, X);
6816   }
6817   // FMA variations
6818   case PPC::BI__builtin_vsx_xvmaddadp:
6819   case PPC::BI__builtin_vsx_xvmaddasp:
6820   case PPC::BI__builtin_vsx_xvnmaddadp:
6821   case PPC::BI__builtin_vsx_xvnmaddasp:
6822   case PPC::BI__builtin_vsx_xvmsubadp:
6823   case PPC::BI__builtin_vsx_xvmsubasp:
6824   case PPC::BI__builtin_vsx_xvnmsubadp:
6825   case PPC::BI__builtin_vsx_xvnmsubasp: {
6826     llvm::Type *ResultType = ConvertType(E->getType());
6827     Value *X = EmitScalarExpr(E->getArg(0));
6828     Value *Y = EmitScalarExpr(E->getArg(1));
6829     Value *Z = EmitScalarExpr(E->getArg(2));
6830     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
6831     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
6832     switch (BuiltinID) {
6833       case PPC::BI__builtin_vsx_xvmaddadp:
6834       case PPC::BI__builtin_vsx_xvmaddasp:
6835         return Builder.CreateCall(F, {X, Y, Z});
6836       case PPC::BI__builtin_vsx_xvnmaddadp:
6837       case PPC::BI__builtin_vsx_xvnmaddasp:
6838         return Builder.CreateFSub(Zero,
6839                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
6840       case PPC::BI__builtin_vsx_xvmsubadp:
6841       case PPC::BI__builtin_vsx_xvmsubasp:
6842         return Builder.CreateCall(F,
6843                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
6844       case PPC::BI__builtin_vsx_xvnmsubadp:
6845       case PPC::BI__builtin_vsx_xvnmsubasp:
6846         Value *FsubRes =
6847           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
6848         return Builder.CreateFSub(Zero, FsubRes, "sub");
6849     }
6850     llvm_unreachable("Unknown FMA operation");
6851     return nullptr; // Suppress no-return warning
6852   }
6853   }
6854 }
6855 
6856 // Emit an intrinsic that has 1 float or double.
6857 static Value *emitUnaryFPBuiltin(CodeGenFunction &CGF,
6858                                  const CallExpr *E,
6859                                  unsigned IntrinsicID) {
6860   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
6861 
6862   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
6863   return CGF.Builder.CreateCall(F, Src0);
6864 }
6865 
6866 // Emit an intrinsic that has 3 float or double operands.
6867 static Value *emitTernaryFPBuiltin(CodeGenFunction &CGF,
6868                                    const CallExpr *E,
6869                                    unsigned IntrinsicID) {
6870   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
6871   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
6872   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
6873 
6874   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
6875   return CGF.Builder.CreateCall(F, {Src0, Src1, Src2});
6876 }
6877 
6878 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
6879 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
6880                                const CallExpr *E,
6881                                unsigned IntrinsicID) {
6882   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
6883   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
6884 
6885   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
6886   return CGF.Builder.CreateCall(F, {Src0, Src1});
6887 }
6888 
6889 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
6890                                               const CallExpr *E) {
6891   switch (BuiltinID) {
6892   case AMDGPU::BI__builtin_amdgpu_div_scale:
6893   case AMDGPU::BI__builtin_amdgpu_div_scalef: {
6894     // Translate from the intrinsics's struct return to the builtin's out
6895     // argument.
6896 
6897     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
6898 
6899     llvm::Value *X = EmitScalarExpr(E->getArg(0));
6900     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
6901     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
6902 
6903     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale,
6904                                            X->getType());
6905 
6906     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
6907 
6908     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
6909     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
6910 
6911     llvm::Type *RealFlagType
6912       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
6913 
6914     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
6915     Builder.CreateStore(FlagExt, FlagOutPtr);
6916     return Result;
6917   }
6918   case AMDGPU::BI__builtin_amdgpu_div_fmas:
6919   case AMDGPU::BI__builtin_amdgpu_div_fmasf: {
6920     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
6921     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
6922     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
6923     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
6924 
6925     llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas,
6926                                       Src0->getType());
6927     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
6928     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
6929   }
6930   case AMDGPU::BI__builtin_amdgpu_div_fixup:
6931   case AMDGPU::BI__builtin_amdgpu_div_fixupf:
6932     return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup);
6933   case AMDGPU::BI__builtin_amdgpu_trig_preop:
6934   case AMDGPU::BI__builtin_amdgpu_trig_preopf:
6935     return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop);
6936   case AMDGPU::BI__builtin_amdgpu_rcp:
6937   case AMDGPU::BI__builtin_amdgpu_rcpf:
6938     return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp);
6939   case AMDGPU::BI__builtin_amdgpu_rsq:
6940   case AMDGPU::BI__builtin_amdgpu_rsqf:
6941     return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq);
6942   case AMDGPU::BI__builtin_amdgpu_rsq_clamped:
6943   case AMDGPU::BI__builtin_amdgpu_rsq_clampedf:
6944     return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped);
6945   case AMDGPU::BI__builtin_amdgpu_ldexp:
6946   case AMDGPU::BI__builtin_amdgpu_ldexpf:
6947     return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp);
6948   case AMDGPU::BI__builtin_amdgpu_class:
6949   case AMDGPU::BI__builtin_amdgpu_classf:
6950     return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_class);
6951    default:
6952     return nullptr;
6953   }
6954 }
6955 
6956 /// Handle a SystemZ function in which the final argument is a pointer
6957 /// to an int that receives the post-instruction CC value.  At the LLVM level
6958 /// this is represented as a function that returns a {result, cc} pair.
6959 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
6960                                          unsigned IntrinsicID,
6961                                          const CallExpr *E) {
6962   unsigned NumArgs = E->getNumArgs() - 1;
6963   SmallVector<Value *, 8> Args(NumArgs);
6964   for (unsigned I = 0; I < NumArgs; ++I)
6965     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
6966   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
6967   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
6968   Value *Call = CGF.Builder.CreateCall(F, Args);
6969   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
6970   CGF.Builder.CreateStore(CC, CCPtr);
6971   return CGF.Builder.CreateExtractValue(Call, 0);
6972 }
6973 
6974 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
6975                                                const CallExpr *E) {
6976   switch (BuiltinID) {
6977   case SystemZ::BI__builtin_tbegin: {
6978     Value *TDB = EmitScalarExpr(E->getArg(0));
6979     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
6980     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
6981     return Builder.CreateCall(F, {TDB, Control});
6982   }
6983   case SystemZ::BI__builtin_tbegin_nofloat: {
6984     Value *TDB = EmitScalarExpr(E->getArg(0));
6985     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
6986     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
6987     return Builder.CreateCall(F, {TDB, Control});
6988   }
6989   case SystemZ::BI__builtin_tbeginc: {
6990     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
6991     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
6992     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
6993     return Builder.CreateCall(F, {TDB, Control});
6994   }
6995   case SystemZ::BI__builtin_tabort: {
6996     Value *Data = EmitScalarExpr(E->getArg(0));
6997     Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
6998     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
6999   }
7000   case SystemZ::BI__builtin_non_tx_store: {
7001     Value *Address = EmitScalarExpr(E->getArg(0));
7002     Value *Data = EmitScalarExpr(E->getArg(1));
7003     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
7004     return Builder.CreateCall(F, {Data, Address});
7005   }
7006 
7007   // Vector builtins.  Note that most vector builtins are mapped automatically
7008   // to target-specific LLVM intrinsics.  The ones handled specially here can
7009   // be represented via standard LLVM IR, which is preferable to enable common
7010   // LLVM optimizations.
7011 
7012   case SystemZ::BI__builtin_s390_vpopctb:
7013   case SystemZ::BI__builtin_s390_vpopcth:
7014   case SystemZ::BI__builtin_s390_vpopctf:
7015   case SystemZ::BI__builtin_s390_vpopctg: {
7016     llvm::Type *ResultType = ConvertType(E->getType());
7017     Value *X = EmitScalarExpr(E->getArg(0));
7018     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
7019     return Builder.CreateCall(F, X);
7020   }
7021 
7022   case SystemZ::BI__builtin_s390_vclzb:
7023   case SystemZ::BI__builtin_s390_vclzh:
7024   case SystemZ::BI__builtin_s390_vclzf:
7025   case SystemZ::BI__builtin_s390_vclzg: {
7026     llvm::Type *ResultType = ConvertType(E->getType());
7027     Value *X = EmitScalarExpr(E->getArg(0));
7028     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7029     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
7030     return Builder.CreateCall(F, {X, Undef});
7031   }
7032 
7033   case SystemZ::BI__builtin_s390_vctzb:
7034   case SystemZ::BI__builtin_s390_vctzh:
7035   case SystemZ::BI__builtin_s390_vctzf:
7036   case SystemZ::BI__builtin_s390_vctzg: {
7037     llvm::Type *ResultType = ConvertType(E->getType());
7038     Value *X = EmitScalarExpr(E->getArg(0));
7039     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7040     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
7041     return Builder.CreateCall(F, {X, Undef});
7042   }
7043 
7044   case SystemZ::BI__builtin_s390_vfsqdb: {
7045     llvm::Type *ResultType = ConvertType(E->getType());
7046     Value *X = EmitScalarExpr(E->getArg(0));
7047     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
7048     return Builder.CreateCall(F, X);
7049   }
7050   case SystemZ::BI__builtin_s390_vfmadb: {
7051     llvm::Type *ResultType = ConvertType(E->getType());
7052     Value *X = EmitScalarExpr(E->getArg(0));
7053     Value *Y = EmitScalarExpr(E->getArg(1));
7054     Value *Z = EmitScalarExpr(E->getArg(2));
7055     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7056     return Builder.CreateCall(F, {X, Y, Z});
7057   }
7058   case SystemZ::BI__builtin_s390_vfmsdb: {
7059     llvm::Type *ResultType = ConvertType(E->getType());
7060     Value *X = EmitScalarExpr(E->getArg(0));
7061     Value *Y = EmitScalarExpr(E->getArg(1));
7062     Value *Z = EmitScalarExpr(E->getArg(2));
7063     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7064     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7065     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
7066   }
7067   case SystemZ::BI__builtin_s390_vflpdb: {
7068     llvm::Type *ResultType = ConvertType(E->getType());
7069     Value *X = EmitScalarExpr(E->getArg(0));
7070     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7071     return Builder.CreateCall(F, X);
7072   }
7073   case SystemZ::BI__builtin_s390_vflndb: {
7074     llvm::Type *ResultType = ConvertType(E->getType());
7075     Value *X = EmitScalarExpr(E->getArg(0));
7076     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7077     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7078     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
7079   }
7080   case SystemZ::BI__builtin_s390_vfidb: {
7081     llvm::Type *ResultType = ConvertType(E->getType());
7082     Value *X = EmitScalarExpr(E->getArg(0));
7083     // Constant-fold the M4 and M5 mask arguments.
7084     llvm::APSInt M4, M5;
7085     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
7086     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
7087     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
7088     (void)IsConstM4; (void)IsConstM5;
7089     // Check whether this instance of vfidb can be represented via a LLVM
7090     // standard intrinsic.  We only support some combinations of M4 and M5.
7091     Intrinsic::ID ID = Intrinsic::not_intrinsic;
7092     switch (M4.getZExtValue()) {
7093     default: break;
7094     case 0:  // IEEE-inexact exception allowed
7095       switch (M5.getZExtValue()) {
7096       default: break;
7097       case 0: ID = Intrinsic::rint; break;
7098       }
7099       break;
7100     case 4:  // IEEE-inexact exception suppressed
7101       switch (M5.getZExtValue()) {
7102       default: break;
7103       case 0: ID = Intrinsic::nearbyint; break;
7104       case 1: ID = Intrinsic::round; break;
7105       case 5: ID = Intrinsic::trunc; break;
7106       case 6: ID = Intrinsic::ceil; break;
7107       case 7: ID = Intrinsic::floor; break;
7108       }
7109       break;
7110     }
7111     if (ID != Intrinsic::not_intrinsic) {
7112       Function *F = CGM.getIntrinsic(ID, ResultType);
7113       return Builder.CreateCall(F, X);
7114     }
7115     Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
7116     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
7117     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
7118     return Builder.CreateCall(F, {X, M4Value, M5Value});
7119   }
7120 
7121   // Vector intrisincs that output the post-instruction CC value.
7122 
7123 #define INTRINSIC_WITH_CC(NAME) \
7124     case SystemZ::BI__builtin_##NAME: \
7125       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
7126 
7127   INTRINSIC_WITH_CC(s390_vpkshs);
7128   INTRINSIC_WITH_CC(s390_vpksfs);
7129   INTRINSIC_WITH_CC(s390_vpksgs);
7130 
7131   INTRINSIC_WITH_CC(s390_vpklshs);
7132   INTRINSIC_WITH_CC(s390_vpklsfs);
7133   INTRINSIC_WITH_CC(s390_vpklsgs);
7134 
7135   INTRINSIC_WITH_CC(s390_vceqbs);
7136   INTRINSIC_WITH_CC(s390_vceqhs);
7137   INTRINSIC_WITH_CC(s390_vceqfs);
7138   INTRINSIC_WITH_CC(s390_vceqgs);
7139 
7140   INTRINSIC_WITH_CC(s390_vchbs);
7141   INTRINSIC_WITH_CC(s390_vchhs);
7142   INTRINSIC_WITH_CC(s390_vchfs);
7143   INTRINSIC_WITH_CC(s390_vchgs);
7144 
7145   INTRINSIC_WITH_CC(s390_vchlbs);
7146   INTRINSIC_WITH_CC(s390_vchlhs);
7147   INTRINSIC_WITH_CC(s390_vchlfs);
7148   INTRINSIC_WITH_CC(s390_vchlgs);
7149 
7150   INTRINSIC_WITH_CC(s390_vfaebs);
7151   INTRINSIC_WITH_CC(s390_vfaehs);
7152   INTRINSIC_WITH_CC(s390_vfaefs);
7153 
7154   INTRINSIC_WITH_CC(s390_vfaezbs);
7155   INTRINSIC_WITH_CC(s390_vfaezhs);
7156   INTRINSIC_WITH_CC(s390_vfaezfs);
7157 
7158   INTRINSIC_WITH_CC(s390_vfeebs);
7159   INTRINSIC_WITH_CC(s390_vfeehs);
7160   INTRINSIC_WITH_CC(s390_vfeefs);
7161 
7162   INTRINSIC_WITH_CC(s390_vfeezbs);
7163   INTRINSIC_WITH_CC(s390_vfeezhs);
7164   INTRINSIC_WITH_CC(s390_vfeezfs);
7165 
7166   INTRINSIC_WITH_CC(s390_vfenebs);
7167   INTRINSIC_WITH_CC(s390_vfenehs);
7168   INTRINSIC_WITH_CC(s390_vfenefs);
7169 
7170   INTRINSIC_WITH_CC(s390_vfenezbs);
7171   INTRINSIC_WITH_CC(s390_vfenezhs);
7172   INTRINSIC_WITH_CC(s390_vfenezfs);
7173 
7174   INTRINSIC_WITH_CC(s390_vistrbs);
7175   INTRINSIC_WITH_CC(s390_vistrhs);
7176   INTRINSIC_WITH_CC(s390_vistrfs);
7177 
7178   INTRINSIC_WITH_CC(s390_vstrcbs);
7179   INTRINSIC_WITH_CC(s390_vstrchs);
7180   INTRINSIC_WITH_CC(s390_vstrcfs);
7181 
7182   INTRINSIC_WITH_CC(s390_vstrczbs);
7183   INTRINSIC_WITH_CC(s390_vstrczhs);
7184   INTRINSIC_WITH_CC(s390_vstrczfs);
7185 
7186   INTRINSIC_WITH_CC(s390_vfcedbs);
7187   INTRINSIC_WITH_CC(s390_vfchdbs);
7188   INTRINSIC_WITH_CC(s390_vfchedbs);
7189 
7190   INTRINSIC_WITH_CC(s390_vftcidb);
7191 
7192 #undef INTRINSIC_WITH_CC
7193 
7194   default:
7195     return nullptr;
7196   }
7197 }
7198 
7199 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
7200                                              const CallExpr *E) {
7201   switch (BuiltinID) {
7202   case NVPTX::BI__nvvm_atom_add_gen_i:
7203   case NVPTX::BI__nvvm_atom_add_gen_l:
7204   case NVPTX::BI__nvvm_atom_add_gen_ll:
7205     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
7206 
7207   case NVPTX::BI__nvvm_atom_sub_gen_i:
7208   case NVPTX::BI__nvvm_atom_sub_gen_l:
7209   case NVPTX::BI__nvvm_atom_sub_gen_ll:
7210     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
7211 
7212   case NVPTX::BI__nvvm_atom_and_gen_i:
7213   case NVPTX::BI__nvvm_atom_and_gen_l:
7214   case NVPTX::BI__nvvm_atom_and_gen_ll:
7215     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
7216 
7217   case NVPTX::BI__nvvm_atom_or_gen_i:
7218   case NVPTX::BI__nvvm_atom_or_gen_l:
7219   case NVPTX::BI__nvvm_atom_or_gen_ll:
7220     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
7221 
7222   case NVPTX::BI__nvvm_atom_xor_gen_i:
7223   case NVPTX::BI__nvvm_atom_xor_gen_l:
7224   case NVPTX::BI__nvvm_atom_xor_gen_ll:
7225     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
7226 
7227   case NVPTX::BI__nvvm_atom_xchg_gen_i:
7228   case NVPTX::BI__nvvm_atom_xchg_gen_l:
7229   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
7230     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
7231 
7232   case NVPTX::BI__nvvm_atom_max_gen_i:
7233   case NVPTX::BI__nvvm_atom_max_gen_l:
7234   case NVPTX::BI__nvvm_atom_max_gen_ll:
7235     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
7236 
7237   case NVPTX::BI__nvvm_atom_max_gen_ui:
7238   case NVPTX::BI__nvvm_atom_max_gen_ul:
7239   case NVPTX::BI__nvvm_atom_max_gen_ull:
7240     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
7241 
7242   case NVPTX::BI__nvvm_atom_min_gen_i:
7243   case NVPTX::BI__nvvm_atom_min_gen_l:
7244   case NVPTX::BI__nvvm_atom_min_gen_ll:
7245     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
7246 
7247   case NVPTX::BI__nvvm_atom_min_gen_ui:
7248   case NVPTX::BI__nvvm_atom_min_gen_ul:
7249   case NVPTX::BI__nvvm_atom_min_gen_ull:
7250     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
7251 
7252   case NVPTX::BI__nvvm_atom_cas_gen_i:
7253   case NVPTX::BI__nvvm_atom_cas_gen_l:
7254   case NVPTX::BI__nvvm_atom_cas_gen_ll:
7255     // __nvvm_atom_cas_gen_* should return the old value rather than the
7256     // success flag.
7257     return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
7258 
7259   case NVPTX::BI__nvvm_atom_add_gen_f: {
7260     Value *Ptr = EmitScalarExpr(E->getArg(0));
7261     Value *Val = EmitScalarExpr(E->getArg(1));
7262     // atomicrmw only deals with integer arguments so we need to use
7263     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
7264     Value *FnALAF32 =
7265         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
7266     return Builder.CreateCall(FnALAF32, {Ptr, Val});
7267   }
7268 
7269   default:
7270     return nullptr;
7271   }
7272 }
7273 
7274 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
7275                                                    const CallExpr *E) {
7276   switch (BuiltinID) {
7277   case WebAssembly::BI__builtin_wasm_page_size: {
7278     llvm::Type *ResultType = ConvertType(E->getType());
7279     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_page_size, ResultType);
7280     return Builder.CreateCall(Callee);
7281   }
7282   case WebAssembly::BI__builtin_wasm_memory_size: {
7283     llvm::Type *ResultType = ConvertType(E->getType());
7284     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
7285     return Builder.CreateCall(Callee);
7286   }
7287   case WebAssembly::BI__builtin_wasm_resize_memory: {
7288     Value *X = EmitScalarExpr(E->getArg(0));
7289     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_resize_memory, X->getType());
7290     return Builder.CreateCall(Callee, X);
7291   }
7292 
7293   default:
7294     return nullptr;
7295   }
7296 }
7297