1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "TargetInfo.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "CGObjCRuntime.h"
18 #include "clang/Basic/TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/ASTContext.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/Basic/TargetBuiltins.h"
23 #include "llvm/Intrinsics.h"
24 #include "llvm/Target/TargetData.h"
25 
26 using namespace clang;
27 using namespace CodeGen;
28 using namespace llvm;
29 
30 /// getBuiltinLibFunction - Given a builtin id for a function like
31 /// "__builtin_fabsf", return a Function* for "fabsf".
32 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
33                                                   unsigned BuiltinID) {
34   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
35 
36   // Get the name, skip over the __builtin_ prefix (if necessary).
37   StringRef Name;
38   GlobalDecl D(FD);
39 
40   // If the builtin has been declared explicitly with an assembler label,
41   // use the mangled name. This differs from the plain label on platforms
42   // that prefix labels.
43   if (FD->hasAttr<AsmLabelAttr>())
44     Name = getMangledName(D);
45   else
46     Name = Context.BuiltinInfo.GetName(BuiltinID) + 10;
47 
48   llvm::FunctionType *Ty =
49     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
50 
51   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
52 }
53 
54 /// Emit the conversions required to turn the given value into an
55 /// integer of the given size.
56 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
57                         QualType T, llvm::IntegerType *IntType) {
58   V = CGF.EmitToMemory(V, T);
59 
60   if (V->getType()->isPointerTy())
61     return CGF.Builder.CreatePtrToInt(V, IntType);
62 
63   assert(V->getType() == IntType);
64   return V;
65 }
66 
67 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
68                           QualType T, llvm::Type *ResultType) {
69   V = CGF.EmitFromMemory(V, T);
70 
71   if (ResultType->isPointerTy())
72     return CGF.Builder.CreateIntToPtr(V, ResultType);
73 
74   assert(V->getType() == ResultType);
75   return V;
76 }
77 
78 /// Utility to insert an atomic instruction based on Instrinsic::ID
79 /// and the expression node.
80 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
81                                llvm::AtomicRMWInst::BinOp Kind,
82                                const CallExpr *E) {
83   QualType T = E->getType();
84   assert(E->getArg(0)->getType()->isPointerType());
85   assert(CGF.getContext().hasSameUnqualifiedType(T,
86                                   E->getArg(0)->getType()->getPointeeType()));
87   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
88 
89   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
90   unsigned AddrSpace =
91     cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
92 
93   llvm::IntegerType *IntType =
94     llvm::IntegerType::get(CGF.getLLVMContext(),
95                            CGF.getContext().getTypeSize(T));
96   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
97 
98   llvm::Value *Args[2];
99   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
100   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
101   llvm::Type *ValueType = Args[1]->getType();
102   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
103 
104   llvm::Value *Result =
105       CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
106                                   llvm::SequentiallyConsistent);
107   Result = EmitFromInt(CGF, Result, T, ValueType);
108   return RValue::get(Result);
109 }
110 
111 /// Utility to insert an atomic instruction based Instrinsic::ID and
112 /// the expression node, where the return value is the result of the
113 /// operation.
114 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
115                                    llvm::AtomicRMWInst::BinOp Kind,
116                                    const CallExpr *E,
117                                    Instruction::BinaryOps Op) {
118   QualType T = E->getType();
119   assert(E->getArg(0)->getType()->isPointerType());
120   assert(CGF.getContext().hasSameUnqualifiedType(T,
121                                   E->getArg(0)->getType()->getPointeeType()));
122   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
123 
124   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
125   unsigned AddrSpace =
126     cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
127 
128   llvm::IntegerType *IntType =
129     llvm::IntegerType::get(CGF.getLLVMContext(),
130                            CGF.getContext().getTypeSize(T));
131   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
132 
133   llvm::Value *Args[2];
134   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
135   llvm::Type *ValueType = Args[1]->getType();
136   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
137   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
138 
139   llvm::Value *Result =
140       CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
141                                   llvm::SequentiallyConsistent);
142   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
143   Result = EmitFromInt(CGF, Result, T, ValueType);
144   return RValue::get(Result);
145 }
146 
147 /// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy,
148 /// which must be a scalar floating point type.
149 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) {
150   const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>();
151   assert(ValTyP && "isn't scalar fp type!");
152 
153   StringRef FnName;
154   switch (ValTyP->getKind()) {
155   default: llvm_unreachable("Isn't a scalar fp type!");
156   case BuiltinType::Float:      FnName = "fabsf"; break;
157   case BuiltinType::Double:     FnName = "fabs"; break;
158   case BuiltinType::LongDouble: FnName = "fabsl"; break;
159   }
160 
161   // The prototype is something that takes and returns whatever V's type is.
162   llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), V->getType(),
163                                                    false);
164   llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName);
165 
166   return CGF.Builder.CreateCall(Fn, V, "abs");
167 }
168 
169 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
170                               const CallExpr *E, llvm::Value *calleeValue) {
171   return CGF.EmitCall(E->getCallee()->getType(), calleeValue,
172                       ReturnValueSlot(), E->arg_begin(), E->arg_end(), Fn);
173 }
174 
175 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
176                                         unsigned BuiltinID, const CallExpr *E) {
177   // See if we can constant fold this builtin.  If so, don't emit it at all.
178   Expr::EvalResult Result;
179   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
180       !Result.hasSideEffects()) {
181     if (Result.Val.isInt())
182       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
183                                                 Result.Val.getInt()));
184     if (Result.Val.isFloat())
185       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
186                                                Result.Val.getFloat()));
187   }
188 
189   switch (BuiltinID) {
190   default: break;  // Handle intrinsics and libm functions below.
191   case Builtin::BI__builtin___CFStringMakeConstantString:
192   case Builtin::BI__builtin___NSStringMakeConstantString:
193     return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0));
194   case Builtin::BI__builtin_stdarg_start:
195   case Builtin::BI__builtin_va_start:
196   case Builtin::BI__builtin_va_end: {
197     Value *ArgValue = EmitVAListRef(E->getArg(0));
198     llvm::Type *DestType = Int8PtrTy;
199     if (ArgValue->getType() != DestType)
200       ArgValue = Builder.CreateBitCast(ArgValue, DestType,
201                                        ArgValue->getName().data());
202 
203     Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ?
204       Intrinsic::vaend : Intrinsic::vastart;
205     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue));
206   }
207   case Builtin::BI__builtin_va_copy: {
208     Value *DstPtr = EmitVAListRef(E->getArg(0));
209     Value *SrcPtr = EmitVAListRef(E->getArg(1));
210 
211     llvm::Type *Type = Int8PtrTy;
212 
213     DstPtr = Builder.CreateBitCast(DstPtr, Type);
214     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
215     return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy),
216                                            DstPtr, SrcPtr));
217   }
218   case Builtin::BI__builtin_abs: {
219     Value *ArgValue = EmitScalarExpr(E->getArg(0));
220 
221     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
222     Value *CmpResult =
223     Builder.CreateICmpSGE(ArgValue,
224                           llvm::Constant::getNullValue(ArgValue->getType()),
225                                                             "abscond");
226     Value *Result =
227       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
228 
229     return RValue::get(Result);
230   }
231   case Builtin::BI__builtin_ctz:
232   case Builtin::BI__builtin_ctzl:
233   case Builtin::BI__builtin_ctzll: {
234     Value *ArgValue = EmitScalarExpr(E->getArg(0));
235 
236     llvm::Type *ArgType = ArgValue->getType();
237     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
238 
239     llvm::Type *ResultType = ConvertType(E->getType());
240     Value *Result = Builder.CreateCall(F, ArgValue);
241     if (Result->getType() != ResultType)
242       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
243                                      "cast");
244     return RValue::get(Result);
245   }
246   case Builtin::BI__builtin_clz:
247   case Builtin::BI__builtin_clzl:
248   case Builtin::BI__builtin_clzll: {
249     Value *ArgValue = EmitScalarExpr(E->getArg(0));
250 
251     llvm::Type *ArgType = ArgValue->getType();
252     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
253 
254     llvm::Type *ResultType = ConvertType(E->getType());
255     Value *Result = Builder.CreateCall(F, ArgValue);
256     if (Result->getType() != ResultType)
257       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
258                                      "cast");
259     return RValue::get(Result);
260   }
261   case Builtin::BI__builtin_ffs:
262   case Builtin::BI__builtin_ffsl:
263   case Builtin::BI__builtin_ffsll: {
264     // ffs(x) -> x ? cttz(x) + 1 : 0
265     Value *ArgValue = EmitScalarExpr(E->getArg(0));
266 
267     llvm::Type *ArgType = ArgValue->getType();
268     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
269 
270     llvm::Type *ResultType = ConvertType(E->getType());
271     Value *Tmp = Builder.CreateAdd(Builder.CreateCall(F, ArgValue),
272                                    llvm::ConstantInt::get(ArgType, 1));
273     Value *Zero = llvm::Constant::getNullValue(ArgType);
274     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
275     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
276     if (Result->getType() != ResultType)
277       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
278                                      "cast");
279     return RValue::get(Result);
280   }
281   case Builtin::BI__builtin_parity:
282   case Builtin::BI__builtin_parityl:
283   case Builtin::BI__builtin_parityll: {
284     // parity(x) -> ctpop(x) & 1
285     Value *ArgValue = EmitScalarExpr(E->getArg(0));
286 
287     llvm::Type *ArgType = ArgValue->getType();
288     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
289 
290     llvm::Type *ResultType = ConvertType(E->getType());
291     Value *Tmp = Builder.CreateCall(F, ArgValue);
292     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
293     if (Result->getType() != ResultType)
294       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
295                                      "cast");
296     return RValue::get(Result);
297   }
298   case Builtin::BI__builtin_popcount:
299   case Builtin::BI__builtin_popcountl:
300   case Builtin::BI__builtin_popcountll: {
301     Value *ArgValue = EmitScalarExpr(E->getArg(0));
302 
303     llvm::Type *ArgType = ArgValue->getType();
304     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
305 
306     llvm::Type *ResultType = ConvertType(E->getType());
307     Value *Result = Builder.CreateCall(F, ArgValue);
308     if (Result->getType() != ResultType)
309       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
310                                      "cast");
311     return RValue::get(Result);
312   }
313   case Builtin::BI__builtin_expect: {
314     Value *ArgValue = EmitScalarExpr(E->getArg(0));
315     llvm::Type *ArgType = ArgValue->getType();
316 
317     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
318     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
319 
320     Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue,
321                                         "expval");
322     return RValue::get(Result);
323   }
324   case Builtin::BI__builtin_bswap32:
325   case Builtin::BI__builtin_bswap64: {
326     Value *ArgValue = EmitScalarExpr(E->getArg(0));
327     llvm::Type *ArgType = ArgValue->getType();
328     Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType);
329     return RValue::get(Builder.CreateCall(F, ArgValue));
330   }
331   case Builtin::BI__builtin_object_size: {
332     // We pass this builtin onto the optimizer so that it can
333     // figure out the object size in more complex cases.
334     llvm::Type *ResType = ConvertType(E->getType());
335 
336     // LLVM only supports 0 and 2, make sure that we pass along that
337     // as a boolean.
338     Value *Ty = EmitScalarExpr(E->getArg(1));
339     ConstantInt *CI = dyn_cast<ConstantInt>(Ty);
340     assert(CI);
341     uint64_t val = CI->getZExtValue();
342     CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1);
343 
344     Value *F = CGM.getIntrinsic(Intrinsic::objectsize, ResType);
345     return RValue::get(Builder.CreateCall2(F,
346                                            EmitScalarExpr(E->getArg(0)),
347                                            CI));
348   }
349   case Builtin::BI__builtin_prefetch: {
350     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
351     // FIXME: Technically these constants should of type 'int', yes?
352     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
353       llvm::ConstantInt::get(Int32Ty, 0);
354     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
355       llvm::ConstantInt::get(Int32Ty, 3);
356     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
357     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
358     return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data));
359   }
360   case Builtin::BI__builtin_trap: {
361     Value *F = CGM.getIntrinsic(Intrinsic::trap);
362     return RValue::get(Builder.CreateCall(F));
363   }
364   case Builtin::BI__builtin_unreachable: {
365     if (CatchUndefined)
366       EmitBranch(getTrapBB());
367     else
368       Builder.CreateUnreachable();
369 
370     // We do need to preserve an insertion point.
371     EmitBlock(createBasicBlock("unreachable.cont"));
372 
373     return RValue::get(0);
374   }
375 
376   case Builtin::BI__builtin_powi:
377   case Builtin::BI__builtin_powif:
378   case Builtin::BI__builtin_powil: {
379     Value *Base = EmitScalarExpr(E->getArg(0));
380     Value *Exponent = EmitScalarExpr(E->getArg(1));
381     llvm::Type *ArgType = Base->getType();
382     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
383     return RValue::get(Builder.CreateCall2(F, Base, Exponent));
384   }
385 
386   case Builtin::BI__builtin_isgreater:
387   case Builtin::BI__builtin_isgreaterequal:
388   case Builtin::BI__builtin_isless:
389   case Builtin::BI__builtin_islessequal:
390   case Builtin::BI__builtin_islessgreater:
391   case Builtin::BI__builtin_isunordered: {
392     // Ordered comparisons: we know the arguments to these are matching scalar
393     // floating point values.
394     Value *LHS = EmitScalarExpr(E->getArg(0));
395     Value *RHS = EmitScalarExpr(E->getArg(1));
396 
397     switch (BuiltinID) {
398     default: llvm_unreachable("Unknown ordered comparison");
399     case Builtin::BI__builtin_isgreater:
400       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
401       break;
402     case Builtin::BI__builtin_isgreaterequal:
403       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
404       break;
405     case Builtin::BI__builtin_isless:
406       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
407       break;
408     case Builtin::BI__builtin_islessequal:
409       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
410       break;
411     case Builtin::BI__builtin_islessgreater:
412       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
413       break;
414     case Builtin::BI__builtin_isunordered:
415       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
416       break;
417     }
418     // ZExt bool to int type.
419     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
420   }
421   case Builtin::BI__builtin_isnan: {
422     Value *V = EmitScalarExpr(E->getArg(0));
423     V = Builder.CreateFCmpUNO(V, V, "cmp");
424     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
425   }
426 
427   case Builtin::BI__builtin_isinf: {
428     // isinf(x) --> fabs(x) == infinity
429     Value *V = EmitScalarExpr(E->getArg(0));
430     V = EmitFAbs(*this, V, E->getArg(0)->getType());
431 
432     V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
433     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
434   }
435 
436   // TODO: BI__builtin_isinf_sign
437   //   isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0
438 
439   case Builtin::BI__builtin_isnormal: {
440     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
441     Value *V = EmitScalarExpr(E->getArg(0));
442     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
443 
444     Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
445     Value *IsLessThanInf =
446       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
447     APFloat Smallest = APFloat::getSmallestNormalized(
448                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
449     Value *IsNormal =
450       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
451                             "isnormal");
452     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
453     V = Builder.CreateAnd(V, IsNormal, "and");
454     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
455   }
456 
457   case Builtin::BI__builtin_isfinite: {
458     // isfinite(x) --> x == x && fabs(x) != infinity;
459     Value *V = EmitScalarExpr(E->getArg(0));
460     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
461 
462     Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
463     Value *IsNotInf =
464       Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
465 
466     V = Builder.CreateAnd(Eq, IsNotInf, "and");
467     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
468   }
469 
470   case Builtin::BI__builtin_fpclassify: {
471     Value *V = EmitScalarExpr(E->getArg(5));
472     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
473 
474     // Create Result
475     BasicBlock *Begin = Builder.GetInsertBlock();
476     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
477     Builder.SetInsertPoint(End);
478     PHINode *Result =
479       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
480                         "fpclassify_result");
481 
482     // if (V==0) return FP_ZERO
483     Builder.SetInsertPoint(Begin);
484     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
485                                           "iszero");
486     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
487     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
488     Builder.CreateCondBr(IsZero, End, NotZero);
489     Result->addIncoming(ZeroLiteral, Begin);
490 
491     // if (V != V) return FP_NAN
492     Builder.SetInsertPoint(NotZero);
493     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
494     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
495     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
496     Builder.CreateCondBr(IsNan, End, NotNan);
497     Result->addIncoming(NanLiteral, NotZero);
498 
499     // if (fabs(V) == infinity) return FP_INFINITY
500     Builder.SetInsertPoint(NotNan);
501     Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType());
502     Value *IsInf =
503       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
504                             "isinf");
505     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
506     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
507     Builder.CreateCondBr(IsInf, End, NotInf);
508     Result->addIncoming(InfLiteral, NotNan);
509 
510     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
511     Builder.SetInsertPoint(NotInf);
512     APFloat Smallest = APFloat::getSmallestNormalized(
513         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
514     Value *IsNormal =
515       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
516                             "isnormal");
517     Value *NormalResult =
518       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
519                            EmitScalarExpr(E->getArg(3)));
520     Builder.CreateBr(End);
521     Result->addIncoming(NormalResult, NotInf);
522 
523     // return Result
524     Builder.SetInsertPoint(End);
525     return RValue::get(Result);
526   }
527 
528   case Builtin::BIalloca:
529   case Builtin::BI__builtin_alloca: {
530     Value *Size = EmitScalarExpr(E->getArg(0));
531     return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
532   }
533   case Builtin::BIbzero:
534   case Builtin::BI__builtin_bzero: {
535     Value *Address = EmitScalarExpr(E->getArg(0));
536     Value *SizeVal = EmitScalarExpr(E->getArg(1));
537     Builder.CreateMemSet(Address, Builder.getInt8(0), SizeVal, 1, false);
538     return RValue::get(Address);
539   }
540   case Builtin::BImemcpy:
541   case Builtin::BI__builtin_memcpy: {
542     Value *Address = EmitScalarExpr(E->getArg(0));
543     Value *SrcAddr = EmitScalarExpr(E->getArg(1));
544     Value *SizeVal = EmitScalarExpr(E->getArg(2));
545     Builder.CreateMemCpy(Address, SrcAddr, SizeVal, 1, false);
546     return RValue::get(Address);
547   }
548 
549   case Builtin::BI__builtin___memcpy_chk: {
550     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
551     llvm::APSInt Size, DstSize;
552     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
553         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
554       break;
555     if (Size.ugt(DstSize))
556       break;
557     Value *Dest = EmitScalarExpr(E->getArg(0));
558     Value *Src = EmitScalarExpr(E->getArg(1));
559     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
560     Builder.CreateMemCpy(Dest, Src, SizeVal, 1, false);
561     return RValue::get(Dest);
562   }
563 
564   case Builtin::BI__builtin_objc_memmove_collectable: {
565     Value *Address = EmitScalarExpr(E->getArg(0));
566     Value *SrcAddr = EmitScalarExpr(E->getArg(1));
567     Value *SizeVal = EmitScalarExpr(E->getArg(2));
568     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
569                                                   Address, SrcAddr, SizeVal);
570     return RValue::get(Address);
571   }
572 
573   case Builtin::BI__builtin___memmove_chk: {
574     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
575     llvm::APSInt Size, DstSize;
576     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
577         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
578       break;
579     if (Size.ugt(DstSize))
580       break;
581     Value *Dest = EmitScalarExpr(E->getArg(0));
582     Value *Src = EmitScalarExpr(E->getArg(1));
583     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
584     Builder.CreateMemMove(Dest, Src, SizeVal, 1, false);
585     return RValue::get(Dest);
586   }
587 
588   case Builtin::BImemmove:
589   case Builtin::BI__builtin_memmove: {
590     Value *Address = EmitScalarExpr(E->getArg(0));
591     Value *SrcAddr = EmitScalarExpr(E->getArg(1));
592     Value *SizeVal = EmitScalarExpr(E->getArg(2));
593     Builder.CreateMemMove(Address, SrcAddr, SizeVal, 1, false);
594     return RValue::get(Address);
595   }
596   case Builtin::BImemset:
597   case Builtin::BI__builtin_memset: {
598     Value *Address = EmitScalarExpr(E->getArg(0));
599     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
600                                          Builder.getInt8Ty());
601     Value *SizeVal = EmitScalarExpr(E->getArg(2));
602     Builder.CreateMemSet(Address, ByteVal, SizeVal, 1, false);
603     return RValue::get(Address);
604   }
605   case Builtin::BI__builtin___memset_chk: {
606     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
607     llvm::APSInt Size, DstSize;
608     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
609         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
610       break;
611     if (Size.ugt(DstSize))
612       break;
613     Value *Address = EmitScalarExpr(E->getArg(0));
614     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
615                                          Builder.getInt8Ty());
616     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
617     Builder.CreateMemSet(Address, ByteVal, SizeVal, 1, false);
618 
619     return RValue::get(Address);
620   }
621   case Builtin::BI__builtin_dwarf_cfa: {
622     // The offset in bytes from the first argument to the CFA.
623     //
624     // Why on earth is this in the frontend?  Is there any reason at
625     // all that the backend can't reasonably determine this while
626     // lowering llvm.eh.dwarf.cfa()?
627     //
628     // TODO: If there's a satisfactory reason, add a target hook for
629     // this instead of hard-coding 0, which is correct for most targets.
630     int32_t Offset = 0;
631 
632     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
633     return RValue::get(Builder.CreateCall(F,
634                                       llvm::ConstantInt::get(Int32Ty, Offset)));
635   }
636   case Builtin::BI__builtin_return_address: {
637     Value *Depth = EmitScalarExpr(E->getArg(0));
638     Depth = Builder.CreateIntCast(Depth, Int32Ty, false);
639     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
640     return RValue::get(Builder.CreateCall(F, Depth));
641   }
642   case Builtin::BI__builtin_frame_address: {
643     Value *Depth = EmitScalarExpr(E->getArg(0));
644     Depth = Builder.CreateIntCast(Depth, Int32Ty, false);
645     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
646     return RValue::get(Builder.CreateCall(F, Depth));
647   }
648   case Builtin::BI__builtin_extract_return_addr: {
649     Value *Address = EmitScalarExpr(E->getArg(0));
650     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
651     return RValue::get(Result);
652   }
653   case Builtin::BI__builtin_frob_return_addr: {
654     Value *Address = EmitScalarExpr(E->getArg(0));
655     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
656     return RValue::get(Result);
657   }
658   case Builtin::BI__builtin_dwarf_sp_column: {
659     llvm::IntegerType *Ty
660       = cast<llvm::IntegerType>(ConvertType(E->getType()));
661     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
662     if (Column == -1) {
663       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
664       return RValue::get(llvm::UndefValue::get(Ty));
665     }
666     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
667   }
668   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
669     Value *Address = EmitScalarExpr(E->getArg(0));
670     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
671       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
672     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
673   }
674   case Builtin::BI__builtin_eh_return: {
675     Value *Int = EmitScalarExpr(E->getArg(0));
676     Value *Ptr = EmitScalarExpr(E->getArg(1));
677 
678     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
679     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
680            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
681     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
682                                   ? Intrinsic::eh_return_i32
683                                   : Intrinsic::eh_return_i64);
684     Builder.CreateCall2(F, Int, Ptr);
685     Builder.CreateUnreachable();
686 
687     // We do need to preserve an insertion point.
688     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
689 
690     return RValue::get(0);
691   }
692   case Builtin::BI__builtin_unwind_init: {
693     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
694     return RValue::get(Builder.CreateCall(F));
695   }
696   case Builtin::BI__builtin_extend_pointer: {
697     // Extends a pointer to the size of an _Unwind_Word, which is
698     // uint64_t on all platforms.  Generally this gets poked into a
699     // register and eventually used as an address, so if the
700     // addressing registers are wider than pointers and the platform
701     // doesn't implicitly ignore high-order bits when doing
702     // addressing, we need to make sure we zext / sext based on
703     // the platform's expectations.
704     //
705     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
706 
707     // Cast the pointer to intptr_t.
708     Value *Ptr = EmitScalarExpr(E->getArg(0));
709     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
710 
711     // If that's 64 bits, we're done.
712     if (IntPtrTy->getBitWidth() == 64)
713       return RValue::get(Result);
714 
715     // Otherwise, ask the codegen data what to do.
716     if (getTargetHooks().extendPointerWithSExt())
717       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
718     else
719       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
720   }
721   case Builtin::BI__builtin_setjmp: {
722     // Buffer is a void**.
723     Value *Buf = EmitScalarExpr(E->getArg(0));
724 
725     // Store the frame pointer to the setjmp buffer.
726     Value *FrameAddr =
727       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
728                          ConstantInt::get(Int32Ty, 0));
729     Builder.CreateStore(FrameAddr, Buf);
730 
731     // Store the stack pointer to the setjmp buffer.
732     Value *StackAddr =
733       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
734     Value *StackSaveSlot =
735       Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2));
736     Builder.CreateStore(StackAddr, StackSaveSlot);
737 
738     // Call LLVM's EH setjmp, which is lightweight.
739     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
740     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
741     return RValue::get(Builder.CreateCall(F, Buf));
742   }
743   case Builtin::BI__builtin_longjmp: {
744     Value *Buf = EmitScalarExpr(E->getArg(0));
745     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
746 
747     // Call LLVM's EH longjmp, which is lightweight.
748     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
749 
750     // longjmp doesn't return; mark this as unreachable.
751     Builder.CreateUnreachable();
752 
753     // We do need to preserve an insertion point.
754     EmitBlock(createBasicBlock("longjmp.cont"));
755 
756     return RValue::get(0);
757   }
758   case Builtin::BI__sync_fetch_and_add:
759   case Builtin::BI__sync_fetch_and_sub:
760   case Builtin::BI__sync_fetch_and_or:
761   case Builtin::BI__sync_fetch_and_and:
762   case Builtin::BI__sync_fetch_and_xor:
763   case Builtin::BI__sync_add_and_fetch:
764   case Builtin::BI__sync_sub_and_fetch:
765   case Builtin::BI__sync_and_and_fetch:
766   case Builtin::BI__sync_or_and_fetch:
767   case Builtin::BI__sync_xor_and_fetch:
768   case Builtin::BI__sync_val_compare_and_swap:
769   case Builtin::BI__sync_bool_compare_and_swap:
770   case Builtin::BI__sync_lock_test_and_set:
771   case Builtin::BI__sync_lock_release:
772   case Builtin::BI__sync_swap:
773     llvm_unreachable("Shouldn't make it through sema");
774   case Builtin::BI__sync_fetch_and_add_1:
775   case Builtin::BI__sync_fetch_and_add_2:
776   case Builtin::BI__sync_fetch_and_add_4:
777   case Builtin::BI__sync_fetch_and_add_8:
778   case Builtin::BI__sync_fetch_and_add_16:
779     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
780   case Builtin::BI__sync_fetch_and_sub_1:
781   case Builtin::BI__sync_fetch_and_sub_2:
782   case Builtin::BI__sync_fetch_and_sub_4:
783   case Builtin::BI__sync_fetch_and_sub_8:
784   case Builtin::BI__sync_fetch_and_sub_16:
785     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
786   case Builtin::BI__sync_fetch_and_or_1:
787   case Builtin::BI__sync_fetch_and_or_2:
788   case Builtin::BI__sync_fetch_and_or_4:
789   case Builtin::BI__sync_fetch_and_or_8:
790   case Builtin::BI__sync_fetch_and_or_16:
791     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
792   case Builtin::BI__sync_fetch_and_and_1:
793   case Builtin::BI__sync_fetch_and_and_2:
794   case Builtin::BI__sync_fetch_and_and_4:
795   case Builtin::BI__sync_fetch_and_and_8:
796   case Builtin::BI__sync_fetch_and_and_16:
797     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
798   case Builtin::BI__sync_fetch_and_xor_1:
799   case Builtin::BI__sync_fetch_and_xor_2:
800   case Builtin::BI__sync_fetch_and_xor_4:
801   case Builtin::BI__sync_fetch_and_xor_8:
802   case Builtin::BI__sync_fetch_and_xor_16:
803     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
804 
805   // Clang extensions: not overloaded yet.
806   case Builtin::BI__sync_fetch_and_min:
807     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
808   case Builtin::BI__sync_fetch_and_max:
809     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
810   case Builtin::BI__sync_fetch_and_umin:
811     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
812   case Builtin::BI__sync_fetch_and_umax:
813     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
814 
815   case Builtin::BI__sync_add_and_fetch_1:
816   case Builtin::BI__sync_add_and_fetch_2:
817   case Builtin::BI__sync_add_and_fetch_4:
818   case Builtin::BI__sync_add_and_fetch_8:
819   case Builtin::BI__sync_add_and_fetch_16:
820     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
821                                 llvm::Instruction::Add);
822   case Builtin::BI__sync_sub_and_fetch_1:
823   case Builtin::BI__sync_sub_and_fetch_2:
824   case Builtin::BI__sync_sub_and_fetch_4:
825   case Builtin::BI__sync_sub_and_fetch_8:
826   case Builtin::BI__sync_sub_and_fetch_16:
827     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
828                                 llvm::Instruction::Sub);
829   case Builtin::BI__sync_and_and_fetch_1:
830   case Builtin::BI__sync_and_and_fetch_2:
831   case Builtin::BI__sync_and_and_fetch_4:
832   case Builtin::BI__sync_and_and_fetch_8:
833   case Builtin::BI__sync_and_and_fetch_16:
834     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
835                                 llvm::Instruction::And);
836   case Builtin::BI__sync_or_and_fetch_1:
837   case Builtin::BI__sync_or_and_fetch_2:
838   case Builtin::BI__sync_or_and_fetch_4:
839   case Builtin::BI__sync_or_and_fetch_8:
840   case Builtin::BI__sync_or_and_fetch_16:
841     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
842                                 llvm::Instruction::Or);
843   case Builtin::BI__sync_xor_and_fetch_1:
844   case Builtin::BI__sync_xor_and_fetch_2:
845   case Builtin::BI__sync_xor_and_fetch_4:
846   case Builtin::BI__sync_xor_and_fetch_8:
847   case Builtin::BI__sync_xor_and_fetch_16:
848     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
849                                 llvm::Instruction::Xor);
850 
851   case Builtin::BI__sync_val_compare_and_swap_1:
852   case Builtin::BI__sync_val_compare_and_swap_2:
853   case Builtin::BI__sync_val_compare_and_swap_4:
854   case Builtin::BI__sync_val_compare_and_swap_8:
855   case Builtin::BI__sync_val_compare_and_swap_16: {
856     QualType T = E->getType();
857     llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
858     unsigned AddrSpace =
859       cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
860 
861     llvm::IntegerType *IntType =
862       llvm::IntegerType::get(getLLVMContext(),
863                              getContext().getTypeSize(T));
864     llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
865 
866     Value *Args[3];
867     Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
868     Args[1] = EmitScalarExpr(E->getArg(1));
869     llvm::Type *ValueType = Args[1]->getType();
870     Args[1] = EmitToInt(*this, Args[1], T, IntType);
871     Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
872 
873     Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
874                                                 llvm::SequentiallyConsistent);
875     Result = EmitFromInt(*this, Result, T, ValueType);
876     return RValue::get(Result);
877   }
878 
879   case Builtin::BI__sync_bool_compare_and_swap_1:
880   case Builtin::BI__sync_bool_compare_and_swap_2:
881   case Builtin::BI__sync_bool_compare_and_swap_4:
882   case Builtin::BI__sync_bool_compare_and_swap_8:
883   case Builtin::BI__sync_bool_compare_and_swap_16: {
884     QualType T = E->getArg(1)->getType();
885     llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
886     unsigned AddrSpace =
887       cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
888 
889     llvm::IntegerType *IntType =
890       llvm::IntegerType::get(getLLVMContext(),
891                              getContext().getTypeSize(T));
892     llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
893 
894     Value *Args[3];
895     Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
896     Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType);
897     Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
898 
899     Value *OldVal = Args[1];
900     Value *PrevVal = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
901                                                  llvm::SequentiallyConsistent);
902     Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal);
903     // zext bool to int.
904     Result = Builder.CreateZExt(Result, ConvertType(E->getType()));
905     return RValue::get(Result);
906   }
907 
908   case Builtin::BI__sync_swap_1:
909   case Builtin::BI__sync_swap_2:
910   case Builtin::BI__sync_swap_4:
911   case Builtin::BI__sync_swap_8:
912   case Builtin::BI__sync_swap_16:
913     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
914 
915   case Builtin::BI__sync_lock_test_and_set_1:
916   case Builtin::BI__sync_lock_test_and_set_2:
917   case Builtin::BI__sync_lock_test_and_set_4:
918   case Builtin::BI__sync_lock_test_and_set_8:
919   case Builtin::BI__sync_lock_test_and_set_16:
920     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
921 
922   case Builtin::BI__sync_lock_release_1:
923   case Builtin::BI__sync_lock_release_2:
924   case Builtin::BI__sync_lock_release_4:
925   case Builtin::BI__sync_lock_release_8:
926   case Builtin::BI__sync_lock_release_16: {
927     Value *Ptr = EmitScalarExpr(E->getArg(0));
928     llvm::Type *ElLLVMTy =
929       cast<llvm::PointerType>(Ptr->getType())->getElementType();
930     llvm::StoreInst *Store =
931       Builder.CreateStore(llvm::Constant::getNullValue(ElLLVMTy), Ptr);
932     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
933     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
934     Store->setAlignment(StoreSize.getQuantity());
935     Store->setAtomic(llvm::Release);
936     return RValue::get(0);
937   }
938 
939   case Builtin::BI__sync_synchronize: {
940     // We assume this is supposed to correspond to a C++0x-style
941     // sequentially-consistent fence (i.e. this is only usable for
942     // synchonization, not device I/O or anything like that). This intrinsic
943     // is really badly designed in the sense that in theory, there isn't
944     // any way to safely use it... but in practice, it mostly works
945     // to use it with non-atomic loads and stores to get acquire/release
946     // semantics.
947     Builder.CreateFence(llvm::SequentiallyConsistent);
948     return RValue::get(0);
949   }
950 
951   case Builtin::BI__atomic_thread_fence:
952   case Builtin::BI__atomic_signal_fence: {
953     llvm::SynchronizationScope Scope;
954     if (BuiltinID == Builtin::BI__atomic_signal_fence)
955       Scope = llvm::SingleThread;
956     else
957       Scope = llvm::CrossThread;
958     Value *Order = EmitScalarExpr(E->getArg(0));
959     if (isa<llvm::ConstantInt>(Order)) {
960       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
961       switch (ord) {
962       case 0:  // memory_order_relaxed
963       default: // invalid order
964         break;
965       case 1:  // memory_order_consume
966       case 2:  // memory_order_acquire
967         Builder.CreateFence(llvm::Acquire, Scope);
968         break;
969       case 3:  // memory_order_release
970         Builder.CreateFence(llvm::Release, Scope);
971         break;
972       case 4:  // memory_order_acq_rel
973         Builder.CreateFence(llvm::AcquireRelease, Scope);
974         break;
975       case 5:  // memory_order_seq_cst
976         Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
977         break;
978       }
979       return RValue::get(0);
980     }
981 
982     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
983     AcquireBB = createBasicBlock("acquire", CurFn);
984     ReleaseBB = createBasicBlock("release", CurFn);
985     AcqRelBB = createBasicBlock("acqrel", CurFn);
986     SeqCstBB = createBasicBlock("seqcst", CurFn);
987     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
988 
989     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
990     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
991 
992     Builder.SetInsertPoint(AcquireBB);
993     Builder.CreateFence(llvm::Acquire, Scope);
994     Builder.CreateBr(ContBB);
995     SI->addCase(Builder.getInt32(1), AcquireBB);
996     SI->addCase(Builder.getInt32(2), AcquireBB);
997 
998     Builder.SetInsertPoint(ReleaseBB);
999     Builder.CreateFence(llvm::Release, Scope);
1000     Builder.CreateBr(ContBB);
1001     SI->addCase(Builder.getInt32(3), ReleaseBB);
1002 
1003     Builder.SetInsertPoint(AcqRelBB);
1004     Builder.CreateFence(llvm::AcquireRelease, Scope);
1005     Builder.CreateBr(ContBB);
1006     SI->addCase(Builder.getInt32(4), AcqRelBB);
1007 
1008     Builder.SetInsertPoint(SeqCstBB);
1009     Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1010     Builder.CreateBr(ContBB);
1011     SI->addCase(Builder.getInt32(5), SeqCstBB);
1012 
1013     Builder.SetInsertPoint(ContBB);
1014     return RValue::get(0);
1015   }
1016 
1017     // Library functions with special handling.
1018   case Builtin::BIsqrt:
1019   case Builtin::BIsqrtf:
1020   case Builtin::BIsqrtl: {
1021     // TODO: there is currently no set of optimizer flags
1022     // sufficient for us to rewrite sqrt to @llvm.sqrt.
1023     // -fmath-errno=0 is not good enough; we need finiteness.
1024     // We could probably precondition the call with an ult
1025     // against 0, but is that worth the complexity?
1026     break;
1027   }
1028 
1029   case Builtin::BIpow:
1030   case Builtin::BIpowf:
1031   case Builtin::BIpowl: {
1032     // Rewrite sqrt to intrinsic if allowed.
1033     if (!FD->hasAttr<ConstAttr>())
1034       break;
1035     Value *Base = EmitScalarExpr(E->getArg(0));
1036     Value *Exponent = EmitScalarExpr(E->getArg(1));
1037     llvm::Type *ArgType = Base->getType();
1038     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1039     return RValue::get(Builder.CreateCall2(F, Base, Exponent));
1040   }
1041 
1042   case Builtin::BIfma:
1043   case Builtin::BIfmaf:
1044   case Builtin::BIfmal:
1045   case Builtin::BI__builtin_fma:
1046   case Builtin::BI__builtin_fmaf:
1047   case Builtin::BI__builtin_fmal: {
1048     // Rewrite fma to intrinsic.
1049     Value *FirstArg = EmitScalarExpr(E->getArg(0));
1050     llvm::Type *ArgType = FirstArg->getType();
1051     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1052     return RValue::get(Builder.CreateCall3(F, FirstArg,
1053                                               EmitScalarExpr(E->getArg(1)),
1054                                               EmitScalarExpr(E->getArg(2))));
1055   }
1056 
1057   case Builtin::BI__builtin_signbit:
1058   case Builtin::BI__builtin_signbitf:
1059   case Builtin::BI__builtin_signbitl: {
1060     LLVMContext &C = CGM.getLLVMContext();
1061 
1062     Value *Arg = EmitScalarExpr(E->getArg(0));
1063     llvm::Type *ArgTy = Arg->getType();
1064     if (ArgTy->isPPC_FP128Ty())
1065       break; // FIXME: I'm not sure what the right implementation is here.
1066     int ArgWidth = ArgTy->getPrimitiveSizeInBits();
1067     llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth);
1068     Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy);
1069     Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy);
1070     Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp);
1071     return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType())));
1072   }
1073   case Builtin::BI__builtin_annotation: {
1074     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1075     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1076                                       AnnVal->getType());
1077 
1078     // Get the annotation string, go through casts. Sema requires this to be a
1079     // non-wide string literal, potentially casted, so the cast<> is safe.
1080     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1081     llvm::StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1082     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1083   }
1084   }
1085 
1086   // If this is an alias for a lib function (e.g. __builtin_sin), emit
1087   // the call using the normal call path, but using the unmangled
1088   // version of the function name.
1089   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
1090     return emitLibraryCall(*this, FD, E,
1091                            CGM.getBuiltinLibFunction(FD, BuiltinID));
1092 
1093   // If this is a predefined lib function (e.g. malloc), emit the call
1094   // using exactly the normal call path.
1095   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
1096     return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
1097 
1098   // See if we have a target specific intrinsic.
1099   const char *Name = getContext().BuiltinInfo.GetName(BuiltinID);
1100   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
1101   if (const char *Prefix =
1102       llvm::Triple::getArchTypePrefix(Target.getTriple().getArch()))
1103     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
1104 
1105   if (IntrinsicID != Intrinsic::not_intrinsic) {
1106     SmallVector<Value*, 16> Args;
1107 
1108     // Find out if any arguments are required to be integer constant
1109     // expressions.
1110     unsigned ICEArguments = 0;
1111     ASTContext::GetBuiltinTypeError Error;
1112     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
1113     assert(Error == ASTContext::GE_None && "Should not codegen an error");
1114 
1115     Function *F = CGM.getIntrinsic(IntrinsicID);
1116     llvm::FunctionType *FTy = F->getFunctionType();
1117 
1118     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
1119       Value *ArgValue;
1120       // If this is a normal argument, just emit it as a scalar.
1121       if ((ICEArguments & (1 << i)) == 0) {
1122         ArgValue = EmitScalarExpr(E->getArg(i));
1123       } else {
1124         // If this is required to be a constant, constant fold it so that we
1125         // know that the generated intrinsic gets a ConstantInt.
1126         llvm::APSInt Result;
1127         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
1128         assert(IsConst && "Constant arg isn't actually constant?");
1129         (void)IsConst;
1130         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
1131       }
1132 
1133       // If the intrinsic arg type is different from the builtin arg type
1134       // we need to do a bit cast.
1135       llvm::Type *PTy = FTy->getParamType(i);
1136       if (PTy != ArgValue->getType()) {
1137         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
1138                "Must be able to losslessly bit cast to param");
1139         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
1140       }
1141 
1142       Args.push_back(ArgValue);
1143     }
1144 
1145     Value *V = Builder.CreateCall(F, Args);
1146     QualType BuiltinRetType = E->getType();
1147 
1148     llvm::Type *RetTy = llvm::Type::getVoidTy(getLLVMContext());
1149     if (!BuiltinRetType->isVoidType()) RetTy = ConvertType(BuiltinRetType);
1150 
1151     if (RetTy != V->getType()) {
1152       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
1153              "Must be able to losslessly bit cast result type");
1154       V = Builder.CreateBitCast(V, RetTy);
1155     }
1156 
1157     return RValue::get(V);
1158   }
1159 
1160   // See if we have a target specific builtin that needs to be lowered.
1161   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
1162     return RValue::get(V);
1163 
1164   ErrorUnsupported(E, "builtin function");
1165 
1166   // Unknown builtin, for now just dump it out and return undef.
1167   if (hasAggregateLLVMType(E->getType()))
1168     return RValue::getAggregate(CreateMemTemp(E->getType()));
1169   return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1170 }
1171 
1172 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
1173                                               const CallExpr *E) {
1174   switch (Target.getTriple().getArch()) {
1175   case llvm::Triple::arm:
1176   case llvm::Triple::thumb:
1177     return EmitARMBuiltinExpr(BuiltinID, E);
1178   case llvm::Triple::x86:
1179   case llvm::Triple::x86_64:
1180     return EmitX86BuiltinExpr(BuiltinID, E);
1181   case llvm::Triple::ppc:
1182   case llvm::Triple::ppc64:
1183     return EmitPPCBuiltinExpr(BuiltinID, E);
1184   default:
1185     return 0;
1186   }
1187 }
1188 
1189 static llvm::VectorType *GetNeonType(LLVMContext &C, NeonTypeFlags TypeFlags) {
1190   int IsQuad = TypeFlags.isQuad();
1191   switch (TypeFlags.getEltType()) {
1192   default: break;
1193   case NeonTypeFlags::Int8:
1194   case NeonTypeFlags::Poly8:
1195     return llvm::VectorType::get(llvm::Type::getInt8Ty(C), 8 << IsQuad);
1196   case NeonTypeFlags::Int16:
1197   case NeonTypeFlags::Poly16:
1198   case NeonTypeFlags::Float16:
1199     return llvm::VectorType::get(llvm::Type::getInt16Ty(C), 4 << IsQuad);
1200   case NeonTypeFlags::Int32:
1201     return llvm::VectorType::get(llvm::Type::getInt32Ty(C), 2 << IsQuad);
1202   case NeonTypeFlags::Int64:
1203     return llvm::VectorType::get(llvm::Type::getInt64Ty(C), 1 << IsQuad);
1204   case NeonTypeFlags::Float32:
1205     return llvm::VectorType::get(llvm::Type::getFloatTy(C), 2 << IsQuad);
1206   };
1207   return 0;
1208 }
1209 
1210 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
1211   unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
1212   SmallVector<Constant*, 16> Indices(nElts, C);
1213   Value* SV = llvm::ConstantVector::get(Indices);
1214   return Builder.CreateShuffleVector(V, V, SV, "lane");
1215 }
1216 
1217 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
1218                                      const char *name,
1219                                      unsigned shift, bool rightshift) {
1220   unsigned j = 0;
1221   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1222        ai != ae; ++ai, ++j)
1223     if (shift > 0 && shift == j)
1224       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
1225     else
1226       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
1227 
1228   return Builder.CreateCall(F, Ops, name);
1229 }
1230 
1231 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
1232                                             bool neg) {
1233   ConstantInt *CI = cast<ConstantInt>(V);
1234   int SV = CI->getSExtValue();
1235 
1236   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
1237   llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV);
1238   SmallVector<llvm::Constant*, 16> CV(VTy->getNumElements(), C);
1239   return llvm::ConstantVector::get(CV);
1240 }
1241 
1242 /// GetPointeeAlignment - Given an expression with a pointer type, find the
1243 /// alignment of the type referenced by the pointer.  Skip over implicit
1244 /// casts.
1245 static Value *GetPointeeAlignment(CodeGenFunction &CGF, const Expr *Addr) {
1246   unsigned Align = 1;
1247   // Check if the type is a pointer.  The implicit cast operand might not be.
1248   while (Addr->getType()->isPointerType()) {
1249     QualType PtTy = Addr->getType()->getPointeeType();
1250     unsigned NewA = CGF.getContext().getTypeAlignInChars(PtTy).getQuantity();
1251     if (NewA > Align)
1252       Align = NewA;
1253 
1254     // If the address is an implicit cast, repeat with the cast operand.
1255     if (const ImplicitCastExpr *CastAddr = dyn_cast<ImplicitCastExpr>(Addr)) {
1256       Addr = CastAddr->getSubExpr();
1257       continue;
1258     }
1259     break;
1260   }
1261   return llvm::ConstantInt::get(CGF.Int32Ty, Align);
1262 }
1263 
1264 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
1265                                            const CallExpr *E) {
1266   if (BuiltinID == ARM::BI__clear_cache) {
1267     const FunctionDecl *FD = E->getDirectCallee();
1268     // Oddly people write this call without args on occasion and gcc accepts
1269     // it - it's also marked as varargs in the description file.
1270     SmallVector<Value*, 2> Ops;
1271     for (unsigned i = 0; i < E->getNumArgs(); i++)
1272       Ops.push_back(EmitScalarExpr(E->getArg(i)));
1273     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
1274     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
1275     StringRef Name = FD->getName();
1276     return Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
1277   }
1278 
1279   if (BuiltinID == ARM::BI__builtin_arm_ldrexd) {
1280     Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
1281 
1282     Value *LdPtr = EmitScalarExpr(E->getArg(0));
1283     Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
1284 
1285     Value *Val0 = Builder.CreateExtractValue(Val, 1);
1286     Value *Val1 = Builder.CreateExtractValue(Val, 0);
1287     Val0 = Builder.CreateZExt(Val0, Int64Ty);
1288     Val1 = Builder.CreateZExt(Val1, Int64Ty);
1289 
1290     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
1291     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
1292     return Builder.CreateOr(Val, Val1);
1293   }
1294 
1295   if (BuiltinID == ARM::BI__builtin_arm_strexd) {
1296     Function *F = CGM.getIntrinsic(Intrinsic::arm_strexd);
1297     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, NULL);
1298 
1299     Value *One = llvm::ConstantInt::get(Int32Ty, 1);
1300     Value *Tmp = Builder.CreateAlloca(Int64Ty, One);
1301     Value *Val = EmitScalarExpr(E->getArg(0));
1302     Builder.CreateStore(Val, Tmp);
1303 
1304     Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
1305     Val = Builder.CreateLoad(LdPtr);
1306 
1307     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
1308     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
1309     Value *StPtr = EmitScalarExpr(E->getArg(1));
1310     return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd");
1311   }
1312 
1313   SmallVector<Value*, 4> Ops;
1314   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++)
1315     Ops.push_back(EmitScalarExpr(E->getArg(i)));
1316 
1317   // vget_lane and vset_lane are not overloaded and do not have an extra
1318   // argument that specifies the vector type.
1319   switch (BuiltinID) {
1320   default: break;
1321   case ARM::BI__builtin_neon_vget_lane_i8:
1322   case ARM::BI__builtin_neon_vget_lane_i16:
1323   case ARM::BI__builtin_neon_vget_lane_i32:
1324   case ARM::BI__builtin_neon_vget_lane_i64:
1325   case ARM::BI__builtin_neon_vget_lane_f32:
1326   case ARM::BI__builtin_neon_vgetq_lane_i8:
1327   case ARM::BI__builtin_neon_vgetq_lane_i16:
1328   case ARM::BI__builtin_neon_vgetq_lane_i32:
1329   case ARM::BI__builtin_neon_vgetq_lane_i64:
1330   case ARM::BI__builtin_neon_vgetq_lane_f32:
1331     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
1332                                         "vget_lane");
1333   case ARM::BI__builtin_neon_vset_lane_i8:
1334   case ARM::BI__builtin_neon_vset_lane_i16:
1335   case ARM::BI__builtin_neon_vset_lane_i32:
1336   case ARM::BI__builtin_neon_vset_lane_i64:
1337   case ARM::BI__builtin_neon_vset_lane_f32:
1338   case ARM::BI__builtin_neon_vsetq_lane_i8:
1339   case ARM::BI__builtin_neon_vsetq_lane_i16:
1340   case ARM::BI__builtin_neon_vsetq_lane_i32:
1341   case ARM::BI__builtin_neon_vsetq_lane_i64:
1342   case ARM::BI__builtin_neon_vsetq_lane_f32:
1343     Ops.push_back(EmitScalarExpr(E->getArg(2)));
1344     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
1345   }
1346 
1347   // Get the last argument, which specifies the vector type.
1348   llvm::APSInt Result;
1349   const Expr *Arg = E->getArg(E->getNumArgs()-1);
1350   if (!Arg->isIntegerConstantExpr(Result, getContext()))
1351     return 0;
1352 
1353   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
1354       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
1355     // Determine the overloaded type of this builtin.
1356     llvm::Type *Ty;
1357     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
1358       Ty = llvm::Type::getFloatTy(getLLVMContext());
1359     else
1360       Ty = llvm::Type::getDoubleTy(getLLVMContext());
1361 
1362     // Determine whether this is an unsigned conversion or not.
1363     bool usgn = Result.getZExtValue() == 1;
1364     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
1365 
1366     // Call the appropriate intrinsic.
1367     Function *F = CGM.getIntrinsic(Int, Ty);
1368     return Builder.CreateCall(F, Ops, "vcvtr");
1369   }
1370 
1371   // Determine the type of this overloaded NEON intrinsic.
1372   NeonTypeFlags Type(Result.getZExtValue());
1373   bool usgn = Type.isUnsigned();
1374   bool quad = Type.isQuad();
1375   bool rightShift = false;
1376 
1377   llvm::VectorType *VTy = GetNeonType(getLLVMContext(), Type);
1378   llvm::Type *Ty = VTy;
1379   if (!Ty)
1380     return 0;
1381 
1382   unsigned Int;
1383   switch (BuiltinID) {
1384   default: return 0;
1385   case ARM::BI__builtin_neon_vabd_v:
1386   case ARM::BI__builtin_neon_vabdq_v:
1387     Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
1388     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
1389   case ARM::BI__builtin_neon_vabs_v:
1390   case ARM::BI__builtin_neon_vabsq_v:
1391     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, Ty),
1392                         Ops, "vabs");
1393   case ARM::BI__builtin_neon_vaddhn_v:
1394     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, Ty),
1395                         Ops, "vaddhn");
1396   case ARM::BI__builtin_neon_vcale_v:
1397     std::swap(Ops[0], Ops[1]);
1398   case ARM::BI__builtin_neon_vcage_v: {
1399     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged);
1400     return EmitNeonCall(F, Ops, "vcage");
1401   }
1402   case ARM::BI__builtin_neon_vcaleq_v:
1403     std::swap(Ops[0], Ops[1]);
1404   case ARM::BI__builtin_neon_vcageq_v: {
1405     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq);
1406     return EmitNeonCall(F, Ops, "vcage");
1407   }
1408   case ARM::BI__builtin_neon_vcalt_v:
1409     std::swap(Ops[0], Ops[1]);
1410   case ARM::BI__builtin_neon_vcagt_v: {
1411     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd);
1412     return EmitNeonCall(F, Ops, "vcagt");
1413   }
1414   case ARM::BI__builtin_neon_vcaltq_v:
1415     std::swap(Ops[0], Ops[1]);
1416   case ARM::BI__builtin_neon_vcagtq_v: {
1417     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
1418     return EmitNeonCall(F, Ops, "vcagt");
1419   }
1420   case ARM::BI__builtin_neon_vcls_v:
1421   case ARM::BI__builtin_neon_vclsq_v: {
1422     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, Ty);
1423     return EmitNeonCall(F, Ops, "vcls");
1424   }
1425   case ARM::BI__builtin_neon_vclz_v:
1426   case ARM::BI__builtin_neon_vclzq_v: {
1427     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vclz, Ty);
1428     return EmitNeonCall(F, Ops, "vclz");
1429   }
1430   case ARM::BI__builtin_neon_vcnt_v:
1431   case ARM::BI__builtin_neon_vcntq_v: {
1432     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcnt, Ty);
1433     return EmitNeonCall(F, Ops, "vcnt");
1434   }
1435   case ARM::BI__builtin_neon_vcvt_f16_v: {
1436     assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
1437            "unexpected vcvt_f16_v builtin");
1438     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf);
1439     return EmitNeonCall(F, Ops, "vcvt");
1440   }
1441   case ARM::BI__builtin_neon_vcvt_f32_f16: {
1442     assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
1443            "unexpected vcvt_f32_f16 builtin");
1444     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp);
1445     return EmitNeonCall(F, Ops, "vcvt");
1446   }
1447   case ARM::BI__builtin_neon_vcvt_f32_v:
1448   case ARM::BI__builtin_neon_vcvtq_f32_v:
1449     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1450     Ty = GetNeonType(getLLVMContext(),
1451                      NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
1452     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1453                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1454   case ARM::BI__builtin_neon_vcvt_s32_v:
1455   case ARM::BI__builtin_neon_vcvt_u32_v:
1456   case ARM::BI__builtin_neon_vcvtq_s32_v:
1457   case ARM::BI__builtin_neon_vcvtq_u32_v: {
1458     llvm::Type *FloatTy =
1459       GetNeonType(getLLVMContext(),
1460                   NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
1461     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
1462     return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
1463                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1464   }
1465   case ARM::BI__builtin_neon_vcvt_n_f32_v:
1466   case ARM::BI__builtin_neon_vcvtq_n_f32_v: {
1467     llvm::Type *FloatTy =
1468       GetNeonType(getLLVMContext(),
1469                   NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
1470     llvm::Type *Tys[2] = { FloatTy, Ty };
1471     Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp
1472                : Intrinsic::arm_neon_vcvtfxs2fp;
1473     Function *F = CGM.getIntrinsic(Int, Tys);
1474     return EmitNeonCall(F, Ops, "vcvt_n");
1475   }
1476   case ARM::BI__builtin_neon_vcvt_n_s32_v:
1477   case ARM::BI__builtin_neon_vcvt_n_u32_v:
1478   case ARM::BI__builtin_neon_vcvtq_n_s32_v:
1479   case ARM::BI__builtin_neon_vcvtq_n_u32_v: {
1480     llvm::Type *FloatTy =
1481       GetNeonType(getLLVMContext(),
1482                   NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
1483     llvm::Type *Tys[2] = { Ty, FloatTy };
1484     Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu
1485                : Intrinsic::arm_neon_vcvtfp2fxs;
1486     Function *F = CGM.getIntrinsic(Int, Tys);
1487     return EmitNeonCall(F, Ops, "vcvt_n");
1488   }
1489   case ARM::BI__builtin_neon_vext_v:
1490   case ARM::BI__builtin_neon_vextq_v: {
1491     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
1492     SmallVector<Constant*, 16> Indices;
1493     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1494       Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
1495 
1496     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1497     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1498     Value *SV = llvm::ConstantVector::get(Indices);
1499     return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
1500   }
1501   case ARM::BI__builtin_neon_vhadd_v:
1502   case ARM::BI__builtin_neon_vhaddq_v:
1503     Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds;
1504     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhadd");
1505   case ARM::BI__builtin_neon_vhsub_v:
1506   case ARM::BI__builtin_neon_vhsubq_v:
1507     Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs;
1508     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhsub");
1509   case ARM::BI__builtin_neon_vld1_v:
1510   case ARM::BI__builtin_neon_vld1q_v:
1511     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1512     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty),
1513                         Ops, "vld1");
1514   case ARM::BI__builtin_neon_vld1_lane_v:
1515   case ARM::BI__builtin_neon_vld1q_lane_v:
1516     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1517     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
1518     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1519     Ops[0] = Builder.CreateLoad(Ops[0]);
1520     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
1521   case ARM::BI__builtin_neon_vld1_dup_v:
1522   case ARM::BI__builtin_neon_vld1q_dup_v: {
1523     Value *V = UndefValue::get(Ty);
1524     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
1525     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1526     Ops[0] = Builder.CreateLoad(Ops[0]);
1527     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
1528     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
1529     return EmitNeonSplat(Ops[0], CI);
1530   }
1531   case ARM::BI__builtin_neon_vld2_v:
1532   case ARM::BI__builtin_neon_vld2q_v: {
1533     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, Ty);
1534     Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1535     Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2");
1536     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1537     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1538     return Builder.CreateStore(Ops[1], Ops[0]);
1539   }
1540   case ARM::BI__builtin_neon_vld3_v:
1541   case ARM::BI__builtin_neon_vld3q_v: {
1542     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, Ty);
1543     Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1544     Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3");
1545     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1546     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1547     return Builder.CreateStore(Ops[1], Ops[0]);
1548   }
1549   case ARM::BI__builtin_neon_vld4_v:
1550   case ARM::BI__builtin_neon_vld4q_v: {
1551     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, Ty);
1552     Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1553     Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4");
1554     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1555     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1556     return Builder.CreateStore(Ops[1], Ops[0]);
1557   }
1558   case ARM::BI__builtin_neon_vld2_lane_v:
1559   case ARM::BI__builtin_neon_vld2q_lane_v: {
1560     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, Ty);
1561     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1562     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1563     Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1564     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
1565     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1566     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1567     return Builder.CreateStore(Ops[1], Ops[0]);
1568   }
1569   case ARM::BI__builtin_neon_vld3_lane_v:
1570   case ARM::BI__builtin_neon_vld3q_lane_v: {
1571     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, Ty);
1572     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1573     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1574     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
1575     Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1576     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
1577     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1578     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1579     return Builder.CreateStore(Ops[1], Ops[0]);
1580   }
1581   case ARM::BI__builtin_neon_vld4_lane_v:
1582   case ARM::BI__builtin_neon_vld4q_lane_v: {
1583     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, Ty);
1584     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1585     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1586     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
1587     Ops[5] = Builder.CreateBitCast(Ops[5], Ty);
1588     Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1589     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
1590     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1591     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1592     return Builder.CreateStore(Ops[1], Ops[0]);
1593   }
1594   case ARM::BI__builtin_neon_vld2_dup_v:
1595   case ARM::BI__builtin_neon_vld3_dup_v:
1596   case ARM::BI__builtin_neon_vld4_dup_v: {
1597     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
1598     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
1599       switch (BuiltinID) {
1600       case ARM::BI__builtin_neon_vld2_dup_v:
1601         Int = Intrinsic::arm_neon_vld2;
1602         break;
1603       case ARM::BI__builtin_neon_vld3_dup_v:
1604         Int = Intrinsic::arm_neon_vld2;
1605         break;
1606       case ARM::BI__builtin_neon_vld4_dup_v:
1607         Int = Intrinsic::arm_neon_vld2;
1608         break;
1609       default: llvm_unreachable("unknown vld_dup intrinsic?");
1610       }
1611       Function *F = CGM.getIntrinsic(Int, Ty);
1612       Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1613       Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup");
1614       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1615       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1616       return Builder.CreateStore(Ops[1], Ops[0]);
1617     }
1618     switch (BuiltinID) {
1619     case ARM::BI__builtin_neon_vld2_dup_v:
1620       Int = Intrinsic::arm_neon_vld2lane;
1621       break;
1622     case ARM::BI__builtin_neon_vld3_dup_v:
1623       Int = Intrinsic::arm_neon_vld2lane;
1624       break;
1625     case ARM::BI__builtin_neon_vld4_dup_v:
1626       Int = Intrinsic::arm_neon_vld2lane;
1627       break;
1628     default: llvm_unreachable("unknown vld_dup intrinsic?");
1629     }
1630     Function *F = CGM.getIntrinsic(Int, Ty);
1631     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
1632 
1633     SmallVector<Value*, 6> Args;
1634     Args.push_back(Ops[1]);
1635     Args.append(STy->getNumElements(), UndefValue::get(Ty));
1636 
1637     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
1638     Args.push_back(CI);
1639     Args.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1640 
1641     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
1642     // splat lane 0 to all elts in each vector of the result.
1643     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
1644       Value *Val = Builder.CreateExtractValue(Ops[1], i);
1645       Value *Elt = Builder.CreateBitCast(Val, Ty);
1646       Elt = EmitNeonSplat(Elt, CI);
1647       Elt = Builder.CreateBitCast(Elt, Val->getType());
1648       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
1649     }
1650     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1651     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1652     return Builder.CreateStore(Ops[1], Ops[0]);
1653   }
1654   case ARM::BI__builtin_neon_vmax_v:
1655   case ARM::BI__builtin_neon_vmaxq_v:
1656     Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs;
1657     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
1658   case ARM::BI__builtin_neon_vmin_v:
1659   case ARM::BI__builtin_neon_vminq_v:
1660     Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
1661     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
1662   case ARM::BI__builtin_neon_vmovl_v: {
1663     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1664     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
1665     if (usgn)
1666       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
1667     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
1668   }
1669   case ARM::BI__builtin_neon_vmovn_v: {
1670     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
1671     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
1672     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
1673   }
1674   case ARM::BI__builtin_neon_vmul_v:
1675   case ARM::BI__builtin_neon_vmulq_v:
1676     assert(Type.isPoly() && "vmul builtin only supported for polynomial types");
1677     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmulp, Ty),
1678                         Ops, "vmul");
1679   case ARM::BI__builtin_neon_vmull_v:
1680     Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
1681     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
1682     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
1683   case ARM::BI__builtin_neon_vpadal_v:
1684   case ARM::BI__builtin_neon_vpadalq_v: {
1685     Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals;
1686     // The source operand type has twice as many elements of half the size.
1687     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1688     llvm::Type *EltTy =
1689       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
1690     llvm::Type *NarrowTy =
1691       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
1692     llvm::Type *Tys[2] = { Ty, NarrowTy };
1693     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpadal");
1694   }
1695   case ARM::BI__builtin_neon_vpadd_v:
1696     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, Ty),
1697                         Ops, "vpadd");
1698   case ARM::BI__builtin_neon_vpaddl_v:
1699   case ARM::BI__builtin_neon_vpaddlq_v: {
1700     Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls;
1701     // The source operand type has twice as many elements of half the size.
1702     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1703     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
1704     llvm::Type *NarrowTy =
1705       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
1706     llvm::Type *Tys[2] = { Ty, NarrowTy };
1707     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
1708   }
1709   case ARM::BI__builtin_neon_vpmax_v:
1710     Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
1711     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
1712   case ARM::BI__builtin_neon_vpmin_v:
1713     Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
1714     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
1715   case ARM::BI__builtin_neon_vqabs_v:
1716   case ARM::BI__builtin_neon_vqabsq_v:
1717     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, Ty),
1718                         Ops, "vqabs");
1719   case ARM::BI__builtin_neon_vqadd_v:
1720   case ARM::BI__builtin_neon_vqaddq_v:
1721     Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds;
1722     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqadd");
1723   case ARM::BI__builtin_neon_vqdmlal_v:
1724     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlal, Ty),
1725                         Ops, "vqdmlal");
1726   case ARM::BI__builtin_neon_vqdmlsl_v:
1727     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlsl, Ty),
1728                         Ops, "vqdmlsl");
1729   case ARM::BI__builtin_neon_vqdmulh_v:
1730   case ARM::BI__builtin_neon_vqdmulhq_v:
1731     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, Ty),
1732                         Ops, "vqdmulh");
1733   case ARM::BI__builtin_neon_vqdmull_v:
1734     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
1735                         Ops, "vqdmull");
1736   case ARM::BI__builtin_neon_vqmovn_v:
1737     Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns;
1738     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqmovn");
1739   case ARM::BI__builtin_neon_vqmovun_v:
1740     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, Ty),
1741                         Ops, "vqdmull");
1742   case ARM::BI__builtin_neon_vqneg_v:
1743   case ARM::BI__builtin_neon_vqnegq_v:
1744     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, Ty),
1745                         Ops, "vqneg");
1746   case ARM::BI__builtin_neon_vqrdmulh_v:
1747   case ARM::BI__builtin_neon_vqrdmulhq_v:
1748     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, Ty),
1749                         Ops, "vqrdmulh");
1750   case ARM::BI__builtin_neon_vqrshl_v:
1751   case ARM::BI__builtin_neon_vqrshlq_v:
1752     Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts;
1753     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshl");
1754   case ARM::BI__builtin_neon_vqrshrn_n_v:
1755     Int = usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
1756     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
1757                         1, true);
1758   case ARM::BI__builtin_neon_vqrshrun_n_v:
1759     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
1760                         Ops, "vqrshrun_n", 1, true);
1761   case ARM::BI__builtin_neon_vqshl_v:
1762   case ARM::BI__builtin_neon_vqshlq_v:
1763     Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
1764     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl");
1765   case ARM::BI__builtin_neon_vqshl_n_v:
1766   case ARM::BI__builtin_neon_vqshlq_n_v:
1767     Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
1768     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
1769                         1, false);
1770   case ARM::BI__builtin_neon_vqshlu_n_v:
1771   case ARM::BI__builtin_neon_vqshluq_n_v:
1772     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, Ty),
1773                         Ops, "vqshlu", 1, false);
1774   case ARM::BI__builtin_neon_vqshrn_n_v:
1775     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
1776     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
1777                         1, true);
1778   case ARM::BI__builtin_neon_vqshrun_n_v:
1779     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
1780                         Ops, "vqshrun_n", 1, true);
1781   case ARM::BI__builtin_neon_vqsub_v:
1782   case ARM::BI__builtin_neon_vqsubq_v:
1783     Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs;
1784     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqsub");
1785   case ARM::BI__builtin_neon_vraddhn_v:
1786     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, Ty),
1787                         Ops, "vraddhn");
1788   case ARM::BI__builtin_neon_vrecpe_v:
1789   case ARM::BI__builtin_neon_vrecpeq_v:
1790     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
1791                         Ops, "vrecpe");
1792   case ARM::BI__builtin_neon_vrecps_v:
1793   case ARM::BI__builtin_neon_vrecpsq_v:
1794     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, Ty),
1795                         Ops, "vrecps");
1796   case ARM::BI__builtin_neon_vrhadd_v:
1797   case ARM::BI__builtin_neon_vrhaddq_v:
1798     Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds;
1799     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrhadd");
1800   case ARM::BI__builtin_neon_vrshl_v:
1801   case ARM::BI__builtin_neon_vrshlq_v:
1802     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1803     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshl");
1804   case ARM::BI__builtin_neon_vrshrn_n_v:
1805     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
1806                         Ops, "vrshrn_n", 1, true);
1807   case ARM::BI__builtin_neon_vrshr_n_v:
1808   case ARM::BI__builtin_neon_vrshrq_n_v:
1809     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1810     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true);
1811   case ARM::BI__builtin_neon_vrsqrte_v:
1812   case ARM::BI__builtin_neon_vrsqrteq_v:
1813     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, Ty),
1814                         Ops, "vrsqrte");
1815   case ARM::BI__builtin_neon_vrsqrts_v:
1816   case ARM::BI__builtin_neon_vrsqrtsq_v:
1817     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, Ty),
1818                         Ops, "vrsqrts");
1819   case ARM::BI__builtin_neon_vrsra_n_v:
1820   case ARM::BI__builtin_neon_vrsraq_n_v:
1821     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1822     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1823     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
1824     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1825     Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]);
1826     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
1827   case ARM::BI__builtin_neon_vrsubhn_v:
1828     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, Ty),
1829                         Ops, "vrsubhn");
1830   case ARM::BI__builtin_neon_vshl_v:
1831   case ARM::BI__builtin_neon_vshlq_v:
1832     Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts;
1833     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshl");
1834   case ARM::BI__builtin_neon_vshll_n_v:
1835     Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls;
1836     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshll", 1);
1837   case ARM::BI__builtin_neon_vshl_n_v:
1838   case ARM::BI__builtin_neon_vshlq_n_v:
1839     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
1840     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], "vshl_n");
1841   case ARM::BI__builtin_neon_vshrn_n_v:
1842     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, Ty),
1843                         Ops, "vshrn_n", 1, true);
1844   case ARM::BI__builtin_neon_vshr_n_v:
1845   case ARM::BI__builtin_neon_vshrq_n_v:
1846     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1847     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
1848     if (usgn)
1849       return Builder.CreateLShr(Ops[0], Ops[1], "vshr_n");
1850     else
1851       return Builder.CreateAShr(Ops[0], Ops[1], "vshr_n");
1852   case ARM::BI__builtin_neon_vsri_n_v:
1853   case ARM::BI__builtin_neon_vsriq_n_v:
1854     rightShift = true;
1855   case ARM::BI__builtin_neon_vsli_n_v:
1856   case ARM::BI__builtin_neon_vsliq_n_v:
1857     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
1858     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
1859                         Ops, "vsli_n");
1860   case ARM::BI__builtin_neon_vsra_n_v:
1861   case ARM::BI__builtin_neon_vsraq_n_v:
1862     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1863     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1864     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, false);
1865     if (usgn)
1866       Ops[1] = Builder.CreateLShr(Ops[1], Ops[2], "vsra_n");
1867     else
1868       Ops[1] = Builder.CreateAShr(Ops[1], Ops[2], "vsra_n");
1869     return Builder.CreateAdd(Ops[0], Ops[1]);
1870   case ARM::BI__builtin_neon_vst1_v:
1871   case ARM::BI__builtin_neon_vst1q_v:
1872     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1873     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, Ty),
1874                         Ops, "");
1875   case ARM::BI__builtin_neon_vst1_lane_v:
1876   case ARM::BI__builtin_neon_vst1q_lane_v:
1877     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1878     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
1879     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1880     return Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty));
1881   case ARM::BI__builtin_neon_vst2_v:
1882   case ARM::BI__builtin_neon_vst2q_v:
1883     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1884     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, Ty),
1885                         Ops, "");
1886   case ARM::BI__builtin_neon_vst2_lane_v:
1887   case ARM::BI__builtin_neon_vst2q_lane_v:
1888     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1889     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, Ty),
1890                         Ops, "");
1891   case ARM::BI__builtin_neon_vst3_v:
1892   case ARM::BI__builtin_neon_vst3q_v:
1893     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1894     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, Ty),
1895                         Ops, "");
1896   case ARM::BI__builtin_neon_vst3_lane_v:
1897   case ARM::BI__builtin_neon_vst3q_lane_v:
1898     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1899     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, Ty),
1900                         Ops, "");
1901   case ARM::BI__builtin_neon_vst4_v:
1902   case ARM::BI__builtin_neon_vst4q_v:
1903     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1904     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, Ty),
1905                         Ops, "");
1906   case ARM::BI__builtin_neon_vst4_lane_v:
1907   case ARM::BI__builtin_neon_vst4q_lane_v:
1908     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1909     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, Ty),
1910                         Ops, "");
1911   case ARM::BI__builtin_neon_vsubhn_v:
1912     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vsubhn, Ty),
1913                         Ops, "vsubhn");
1914   case ARM::BI__builtin_neon_vtbl1_v:
1915     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
1916                         Ops, "vtbl1");
1917   case ARM::BI__builtin_neon_vtbl2_v:
1918     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
1919                         Ops, "vtbl2");
1920   case ARM::BI__builtin_neon_vtbl3_v:
1921     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
1922                         Ops, "vtbl3");
1923   case ARM::BI__builtin_neon_vtbl4_v:
1924     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
1925                         Ops, "vtbl4");
1926   case ARM::BI__builtin_neon_vtbx1_v:
1927     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
1928                         Ops, "vtbx1");
1929   case ARM::BI__builtin_neon_vtbx2_v:
1930     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
1931                         Ops, "vtbx2");
1932   case ARM::BI__builtin_neon_vtbx3_v:
1933     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
1934                         Ops, "vtbx3");
1935   case ARM::BI__builtin_neon_vtbx4_v:
1936     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
1937                         Ops, "vtbx4");
1938   case ARM::BI__builtin_neon_vtst_v:
1939   case ARM::BI__builtin_neon_vtstq_v: {
1940     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1941     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1942     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
1943     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
1944                                 ConstantAggregateZero::get(Ty));
1945     return Builder.CreateSExt(Ops[0], Ty, "vtst");
1946   }
1947   case ARM::BI__builtin_neon_vtrn_v:
1948   case ARM::BI__builtin_neon_vtrnq_v: {
1949     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1950     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1951     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1952     Value *SV = 0;
1953 
1954     for (unsigned vi = 0; vi != 2; ++vi) {
1955       SmallVector<Constant*, 16> Indices;
1956       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1957         Indices.push_back(ConstantInt::get(Int32Ty, i+vi));
1958         Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi));
1959       }
1960       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
1961       SV = llvm::ConstantVector::get(Indices);
1962       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
1963       SV = Builder.CreateStore(SV, Addr);
1964     }
1965     return SV;
1966   }
1967   case ARM::BI__builtin_neon_vuzp_v:
1968   case ARM::BI__builtin_neon_vuzpq_v: {
1969     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1970     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1971     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1972     Value *SV = 0;
1973 
1974     for (unsigned vi = 0; vi != 2; ++vi) {
1975       SmallVector<Constant*, 16> Indices;
1976       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1977         Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
1978 
1979       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
1980       SV = llvm::ConstantVector::get(Indices);
1981       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
1982       SV = Builder.CreateStore(SV, Addr);
1983     }
1984     return SV;
1985   }
1986   case ARM::BI__builtin_neon_vzip_v:
1987   case ARM::BI__builtin_neon_vzipq_v: {
1988     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1989     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1990     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1991     Value *SV = 0;
1992 
1993     for (unsigned vi = 0; vi != 2; ++vi) {
1994       SmallVector<Constant*, 16> Indices;
1995       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1996         Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
1997         Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
1998       }
1999       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
2000       SV = llvm::ConstantVector::get(Indices);
2001       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
2002       SV = Builder.CreateStore(SV, Addr);
2003     }
2004     return SV;
2005   }
2006   }
2007 }
2008 
2009 llvm::Value *CodeGenFunction::
2010 BuildVector(const SmallVectorImpl<llvm::Value*> &Ops) {
2011   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
2012          "Not a power-of-two sized vector!");
2013   bool AllConstants = true;
2014   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
2015     AllConstants &= isa<Constant>(Ops[i]);
2016 
2017   // If this is a constant vector, create a ConstantVector.
2018   if (AllConstants) {
2019     std::vector<llvm::Constant*> CstOps;
2020     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2021       CstOps.push_back(cast<Constant>(Ops[i]));
2022     return llvm::ConstantVector::get(CstOps);
2023   }
2024 
2025   // Otherwise, insertelement the values to build the vector.
2026   Value *Result =
2027     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
2028 
2029   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2030     Result = Builder.CreateInsertElement(Result, Ops[i],
2031                llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), i));
2032 
2033   return Result;
2034 }
2035 
2036 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
2037                                            const CallExpr *E) {
2038   SmallVector<Value*, 4> Ops;
2039 
2040   // Find out if any arguments are required to be integer constant expressions.
2041   unsigned ICEArguments = 0;
2042   ASTContext::GetBuiltinTypeError Error;
2043   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2044   assert(Error == ASTContext::GE_None && "Should not codegen an error");
2045 
2046   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
2047     // If this is a normal argument, just emit it as a scalar.
2048     if ((ICEArguments & (1 << i)) == 0) {
2049       Ops.push_back(EmitScalarExpr(E->getArg(i)));
2050       continue;
2051     }
2052 
2053     // If this is required to be a constant, constant fold it so that we know
2054     // that the generated intrinsic gets a ConstantInt.
2055     llvm::APSInt Result;
2056     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
2057     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
2058     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
2059   }
2060 
2061   switch (BuiltinID) {
2062   default: return 0;
2063   case X86::BI__builtin_ia32_pslldi128:
2064   case X86::BI__builtin_ia32_psllqi128:
2065   case X86::BI__builtin_ia32_psllwi128:
2066   case X86::BI__builtin_ia32_psradi128:
2067   case X86::BI__builtin_ia32_psrawi128:
2068   case X86::BI__builtin_ia32_psrldi128:
2069   case X86::BI__builtin_ia32_psrlqi128:
2070   case X86::BI__builtin_ia32_psrlwi128: {
2071     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext");
2072     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
2073     llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0);
2074     Ops[1] = Builder.CreateInsertElement(llvm::UndefValue::get(Ty),
2075                                          Ops[1], Zero, "insert");
2076     Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType(), "bitcast");
2077     const char *name = 0;
2078     Intrinsic::ID ID = Intrinsic::not_intrinsic;
2079 
2080     switch (BuiltinID) {
2081     default: llvm_unreachable("Unsupported shift intrinsic!");
2082     case X86::BI__builtin_ia32_pslldi128:
2083       name = "pslldi";
2084       ID = Intrinsic::x86_sse2_psll_d;
2085       break;
2086     case X86::BI__builtin_ia32_psllqi128:
2087       name = "psllqi";
2088       ID = Intrinsic::x86_sse2_psll_q;
2089       break;
2090     case X86::BI__builtin_ia32_psllwi128:
2091       name = "psllwi";
2092       ID = Intrinsic::x86_sse2_psll_w;
2093       break;
2094     case X86::BI__builtin_ia32_psradi128:
2095       name = "psradi";
2096       ID = Intrinsic::x86_sse2_psra_d;
2097       break;
2098     case X86::BI__builtin_ia32_psrawi128:
2099       name = "psrawi";
2100       ID = Intrinsic::x86_sse2_psra_w;
2101       break;
2102     case X86::BI__builtin_ia32_psrldi128:
2103       name = "psrldi";
2104       ID = Intrinsic::x86_sse2_psrl_d;
2105       break;
2106     case X86::BI__builtin_ia32_psrlqi128:
2107       name = "psrlqi";
2108       ID = Intrinsic::x86_sse2_psrl_q;
2109       break;
2110     case X86::BI__builtin_ia32_psrlwi128:
2111       name = "psrlwi";
2112       ID = Intrinsic::x86_sse2_psrl_w;
2113       break;
2114     }
2115     llvm::Function *F = CGM.getIntrinsic(ID);
2116     return Builder.CreateCall(F, Ops, name);
2117   }
2118   case X86::BI__builtin_ia32_vec_init_v8qi:
2119   case X86::BI__builtin_ia32_vec_init_v4hi:
2120   case X86::BI__builtin_ia32_vec_init_v2si:
2121     return Builder.CreateBitCast(BuildVector(Ops),
2122                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
2123   case X86::BI__builtin_ia32_vec_ext_v2si:
2124     return Builder.CreateExtractElement(Ops[0],
2125                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
2126   case X86::BI__builtin_ia32_pslldi:
2127   case X86::BI__builtin_ia32_psllqi:
2128   case X86::BI__builtin_ia32_psllwi:
2129   case X86::BI__builtin_ia32_psradi:
2130   case X86::BI__builtin_ia32_psrawi:
2131   case X86::BI__builtin_ia32_psrldi:
2132   case X86::BI__builtin_ia32_psrlqi:
2133   case X86::BI__builtin_ia32_psrlwi: {
2134     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext");
2135     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 1);
2136     Ops[1] = Builder.CreateBitCast(Ops[1], Ty, "bitcast");
2137     const char *name = 0;
2138     Intrinsic::ID ID = Intrinsic::not_intrinsic;
2139 
2140     switch (BuiltinID) {
2141     default: llvm_unreachable("Unsupported shift intrinsic!");
2142     case X86::BI__builtin_ia32_pslldi:
2143       name = "pslldi";
2144       ID = Intrinsic::x86_mmx_psll_d;
2145       break;
2146     case X86::BI__builtin_ia32_psllqi:
2147       name = "psllqi";
2148       ID = Intrinsic::x86_mmx_psll_q;
2149       break;
2150     case X86::BI__builtin_ia32_psllwi:
2151       name = "psllwi";
2152       ID = Intrinsic::x86_mmx_psll_w;
2153       break;
2154     case X86::BI__builtin_ia32_psradi:
2155       name = "psradi";
2156       ID = Intrinsic::x86_mmx_psra_d;
2157       break;
2158     case X86::BI__builtin_ia32_psrawi:
2159       name = "psrawi";
2160       ID = Intrinsic::x86_mmx_psra_w;
2161       break;
2162     case X86::BI__builtin_ia32_psrldi:
2163       name = "psrldi";
2164       ID = Intrinsic::x86_mmx_psrl_d;
2165       break;
2166     case X86::BI__builtin_ia32_psrlqi:
2167       name = "psrlqi";
2168       ID = Intrinsic::x86_mmx_psrl_q;
2169       break;
2170     case X86::BI__builtin_ia32_psrlwi:
2171       name = "psrlwi";
2172       ID = Intrinsic::x86_mmx_psrl_w;
2173       break;
2174     }
2175     llvm::Function *F = CGM.getIntrinsic(ID);
2176     return Builder.CreateCall(F, Ops, name);
2177   }
2178   case X86::BI__builtin_ia32_cmpps: {
2179     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ps);
2180     return Builder.CreateCall(F, Ops, "cmpps");
2181   }
2182   case X86::BI__builtin_ia32_cmpss: {
2183     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ss);
2184     return Builder.CreateCall(F, Ops, "cmpss");
2185   }
2186   case X86::BI__builtin_ia32_ldmxcsr: {
2187     llvm::Type *PtrTy = Int8PtrTy;
2188     Value *One = llvm::ConstantInt::get(Int32Ty, 1);
2189     Value *Tmp = Builder.CreateAlloca(Int32Ty, One);
2190     Builder.CreateStore(Ops[0], Tmp);
2191     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
2192                               Builder.CreateBitCast(Tmp, PtrTy));
2193   }
2194   case X86::BI__builtin_ia32_stmxcsr: {
2195     llvm::Type *PtrTy = Int8PtrTy;
2196     Value *One = llvm::ConstantInt::get(Int32Ty, 1);
2197     Value *Tmp = Builder.CreateAlloca(Int32Ty, One);
2198     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
2199                        Builder.CreateBitCast(Tmp, PtrTy));
2200     return Builder.CreateLoad(Tmp, "stmxcsr");
2201   }
2202   case X86::BI__builtin_ia32_cmppd: {
2203     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_pd);
2204     return Builder.CreateCall(F, Ops, "cmppd");
2205   }
2206   case X86::BI__builtin_ia32_cmpsd: {
2207     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_sd);
2208     return Builder.CreateCall(F, Ops, "cmpsd");
2209   }
2210   case X86::BI__builtin_ia32_storehps:
2211   case X86::BI__builtin_ia32_storelps: {
2212     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
2213     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
2214 
2215     // cast val v2i64
2216     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
2217 
2218     // extract (0, 1)
2219     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
2220     llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index);
2221     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
2222 
2223     // cast pointer to i64 & store
2224     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
2225     return Builder.CreateStore(Ops[1], Ops[0]);
2226   }
2227   case X86::BI__builtin_ia32_palignr: {
2228     unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2229 
2230     // If palignr is shifting the pair of input vectors less than 9 bytes,
2231     // emit a shuffle instruction.
2232     if (shiftVal <= 8) {
2233       SmallVector<llvm::Constant*, 8> Indices;
2234       for (unsigned i = 0; i != 8; ++i)
2235         Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
2236 
2237       Value* SV = llvm::ConstantVector::get(Indices);
2238       return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2239     }
2240 
2241     // If palignr is shifting the pair of input vectors more than 8 but less
2242     // than 16 bytes, emit a logical right shift of the destination.
2243     if (shiftVal < 16) {
2244       // MMX has these as 1 x i64 vectors for some odd optimization reasons.
2245       llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1);
2246 
2247       Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2248       Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8);
2249 
2250       // create i32 constant
2251       llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q);
2252       return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
2253     }
2254 
2255     // If palignr is shifting the pair of vectors more than 16 bytes, emit zero.
2256     return llvm::Constant::getNullValue(ConvertType(E->getType()));
2257   }
2258   case X86::BI__builtin_ia32_palignr128: {
2259     unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2260 
2261     // If palignr is shifting the pair of input vectors less than 17 bytes,
2262     // emit a shuffle instruction.
2263     if (shiftVal <= 16) {
2264       SmallVector<llvm::Constant*, 16> Indices;
2265       for (unsigned i = 0; i != 16; ++i)
2266         Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
2267 
2268       Value* SV = llvm::ConstantVector::get(Indices);
2269       return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2270     }
2271 
2272     // If palignr is shifting the pair of input vectors more than 16 but less
2273     // than 32 bytes, emit a logical right shift of the destination.
2274     if (shiftVal < 32) {
2275       llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
2276 
2277       Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2278       Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
2279 
2280       // create i32 constant
2281       llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq);
2282       return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
2283     }
2284 
2285     // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
2286     return llvm::Constant::getNullValue(ConvertType(E->getType()));
2287   }
2288   case X86::BI__builtin_ia32_movntps:
2289   case X86::BI__builtin_ia32_movntpd:
2290   case X86::BI__builtin_ia32_movntdq:
2291   case X86::BI__builtin_ia32_movnti: {
2292     llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(),
2293                                            Builder.getInt32(1));
2294 
2295     // Convert the type of the pointer to a pointer to the stored type.
2296     Value *BC = Builder.CreateBitCast(Ops[0],
2297                                 llvm::PointerType::getUnqual(Ops[1]->getType()),
2298                                       "cast");
2299     StoreInst *SI = Builder.CreateStore(Ops[1], BC);
2300     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
2301     SI->setAlignment(16);
2302     return SI;
2303   }
2304   // 3DNow!
2305   case X86::BI__builtin_ia32_pavgusb:
2306   case X86::BI__builtin_ia32_pf2id:
2307   case X86::BI__builtin_ia32_pfacc:
2308   case X86::BI__builtin_ia32_pfadd:
2309   case X86::BI__builtin_ia32_pfcmpeq:
2310   case X86::BI__builtin_ia32_pfcmpge:
2311   case X86::BI__builtin_ia32_pfcmpgt:
2312   case X86::BI__builtin_ia32_pfmax:
2313   case X86::BI__builtin_ia32_pfmin:
2314   case X86::BI__builtin_ia32_pfmul:
2315   case X86::BI__builtin_ia32_pfrcp:
2316   case X86::BI__builtin_ia32_pfrcpit1:
2317   case X86::BI__builtin_ia32_pfrcpit2:
2318   case X86::BI__builtin_ia32_pfrsqrt:
2319   case X86::BI__builtin_ia32_pfrsqit1:
2320   case X86::BI__builtin_ia32_pfrsqrtit1:
2321   case X86::BI__builtin_ia32_pfsub:
2322   case X86::BI__builtin_ia32_pfsubr:
2323   case X86::BI__builtin_ia32_pi2fd:
2324   case X86::BI__builtin_ia32_pmulhrw:
2325   case X86::BI__builtin_ia32_pf2iw:
2326   case X86::BI__builtin_ia32_pfnacc:
2327   case X86::BI__builtin_ia32_pfpnacc:
2328   case X86::BI__builtin_ia32_pi2fw:
2329   case X86::BI__builtin_ia32_pswapdsf:
2330   case X86::BI__builtin_ia32_pswapdsi: {
2331     const char *name = 0;
2332     Intrinsic::ID ID = Intrinsic::not_intrinsic;
2333     switch(BuiltinID) {
2334     case X86::BI__builtin_ia32_pavgusb:
2335       name = "pavgusb";
2336       ID = Intrinsic::x86_3dnow_pavgusb;
2337       break;
2338     case X86::BI__builtin_ia32_pf2id:
2339       name = "pf2id";
2340       ID = Intrinsic::x86_3dnow_pf2id;
2341       break;
2342     case X86::BI__builtin_ia32_pfacc:
2343       name = "pfacc";
2344       ID = Intrinsic::x86_3dnow_pfacc;
2345       break;
2346     case X86::BI__builtin_ia32_pfadd:
2347       name = "pfadd";
2348       ID = Intrinsic::x86_3dnow_pfadd;
2349       break;
2350     case X86::BI__builtin_ia32_pfcmpeq:
2351       name = "pfcmpeq";
2352       ID = Intrinsic::x86_3dnow_pfcmpeq;
2353       break;
2354     case X86::BI__builtin_ia32_pfcmpge:
2355       name = "pfcmpge";
2356       ID = Intrinsic::x86_3dnow_pfcmpge;
2357       break;
2358     case X86::BI__builtin_ia32_pfcmpgt:
2359       name = "pfcmpgt";
2360       ID = Intrinsic::x86_3dnow_pfcmpgt;
2361       break;
2362     case X86::BI__builtin_ia32_pfmax:
2363       name = "pfmax";
2364       ID = Intrinsic::x86_3dnow_pfmax;
2365       break;
2366     case X86::BI__builtin_ia32_pfmin:
2367       name = "pfmin";
2368       ID = Intrinsic::x86_3dnow_pfmin;
2369       break;
2370     case X86::BI__builtin_ia32_pfmul:
2371       name = "pfmul";
2372       ID = Intrinsic::x86_3dnow_pfmul;
2373       break;
2374     case X86::BI__builtin_ia32_pfrcp:
2375       name = "pfrcp";
2376       ID = Intrinsic::x86_3dnow_pfrcp;
2377       break;
2378     case X86::BI__builtin_ia32_pfrcpit1:
2379       name = "pfrcpit1";
2380       ID = Intrinsic::x86_3dnow_pfrcpit1;
2381       break;
2382     case X86::BI__builtin_ia32_pfrcpit2:
2383       name = "pfrcpit2";
2384       ID = Intrinsic::x86_3dnow_pfrcpit2;
2385       break;
2386     case X86::BI__builtin_ia32_pfrsqrt:
2387       name = "pfrsqrt";
2388       ID = Intrinsic::x86_3dnow_pfrsqrt;
2389       break;
2390     case X86::BI__builtin_ia32_pfrsqit1:
2391     case X86::BI__builtin_ia32_pfrsqrtit1:
2392       name = "pfrsqit1";
2393       ID = Intrinsic::x86_3dnow_pfrsqit1;
2394       break;
2395     case X86::BI__builtin_ia32_pfsub:
2396       name = "pfsub";
2397       ID = Intrinsic::x86_3dnow_pfsub;
2398       break;
2399     case X86::BI__builtin_ia32_pfsubr:
2400       name = "pfsubr";
2401       ID = Intrinsic::x86_3dnow_pfsubr;
2402       break;
2403     case X86::BI__builtin_ia32_pi2fd:
2404       name = "pi2fd";
2405       ID = Intrinsic::x86_3dnow_pi2fd;
2406       break;
2407     case X86::BI__builtin_ia32_pmulhrw:
2408       name = "pmulhrw";
2409       ID = Intrinsic::x86_3dnow_pmulhrw;
2410       break;
2411     case X86::BI__builtin_ia32_pf2iw:
2412       name = "pf2iw";
2413       ID = Intrinsic::x86_3dnowa_pf2iw;
2414       break;
2415     case X86::BI__builtin_ia32_pfnacc:
2416       name = "pfnacc";
2417       ID = Intrinsic::x86_3dnowa_pfnacc;
2418       break;
2419     case X86::BI__builtin_ia32_pfpnacc:
2420       name = "pfpnacc";
2421       ID = Intrinsic::x86_3dnowa_pfpnacc;
2422       break;
2423     case X86::BI__builtin_ia32_pi2fw:
2424       name = "pi2fw";
2425       ID = Intrinsic::x86_3dnowa_pi2fw;
2426       break;
2427     case X86::BI__builtin_ia32_pswapdsf:
2428     case X86::BI__builtin_ia32_pswapdsi:
2429       name = "pswapd";
2430       ID = Intrinsic::x86_3dnowa_pswapd;
2431       break;
2432     }
2433     llvm::Function *F = CGM.getIntrinsic(ID);
2434     return Builder.CreateCall(F, Ops, name);
2435   }
2436   }
2437 }
2438 
2439 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
2440                                            const CallExpr *E) {
2441   SmallVector<Value*, 4> Ops;
2442 
2443   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
2444     Ops.push_back(EmitScalarExpr(E->getArg(i)));
2445 
2446   Intrinsic::ID ID = Intrinsic::not_intrinsic;
2447 
2448   switch (BuiltinID) {
2449   default: return 0;
2450 
2451   // vec_ld, vec_lvsl, vec_lvsr
2452   case PPC::BI__builtin_altivec_lvx:
2453   case PPC::BI__builtin_altivec_lvxl:
2454   case PPC::BI__builtin_altivec_lvebx:
2455   case PPC::BI__builtin_altivec_lvehx:
2456   case PPC::BI__builtin_altivec_lvewx:
2457   case PPC::BI__builtin_altivec_lvsl:
2458   case PPC::BI__builtin_altivec_lvsr:
2459   {
2460     Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
2461 
2462     Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
2463     Ops.pop_back();
2464 
2465     switch (BuiltinID) {
2466     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
2467     case PPC::BI__builtin_altivec_lvx:
2468       ID = Intrinsic::ppc_altivec_lvx;
2469       break;
2470     case PPC::BI__builtin_altivec_lvxl:
2471       ID = Intrinsic::ppc_altivec_lvxl;
2472       break;
2473     case PPC::BI__builtin_altivec_lvebx:
2474       ID = Intrinsic::ppc_altivec_lvebx;
2475       break;
2476     case PPC::BI__builtin_altivec_lvehx:
2477       ID = Intrinsic::ppc_altivec_lvehx;
2478       break;
2479     case PPC::BI__builtin_altivec_lvewx:
2480       ID = Intrinsic::ppc_altivec_lvewx;
2481       break;
2482     case PPC::BI__builtin_altivec_lvsl:
2483       ID = Intrinsic::ppc_altivec_lvsl;
2484       break;
2485     case PPC::BI__builtin_altivec_lvsr:
2486       ID = Intrinsic::ppc_altivec_lvsr;
2487       break;
2488     }
2489     llvm::Function *F = CGM.getIntrinsic(ID);
2490     return Builder.CreateCall(F, Ops, "");
2491   }
2492 
2493   // vec_st
2494   case PPC::BI__builtin_altivec_stvx:
2495   case PPC::BI__builtin_altivec_stvxl:
2496   case PPC::BI__builtin_altivec_stvebx:
2497   case PPC::BI__builtin_altivec_stvehx:
2498   case PPC::BI__builtin_altivec_stvewx:
2499   {
2500     Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
2501     Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
2502     Ops.pop_back();
2503 
2504     switch (BuiltinID) {
2505     default: llvm_unreachable("Unsupported st intrinsic!");
2506     case PPC::BI__builtin_altivec_stvx:
2507       ID = Intrinsic::ppc_altivec_stvx;
2508       break;
2509     case PPC::BI__builtin_altivec_stvxl:
2510       ID = Intrinsic::ppc_altivec_stvxl;
2511       break;
2512     case PPC::BI__builtin_altivec_stvebx:
2513       ID = Intrinsic::ppc_altivec_stvebx;
2514       break;
2515     case PPC::BI__builtin_altivec_stvehx:
2516       ID = Intrinsic::ppc_altivec_stvehx;
2517       break;
2518     case PPC::BI__builtin_altivec_stvewx:
2519       ID = Intrinsic::ppc_altivec_stvewx;
2520       break;
2521     }
2522     llvm::Function *F = CGM.getIntrinsic(ID);
2523     return Builder.CreateCall(F, Ops, "");
2524   }
2525   }
2526   return 0;
2527 }
2528