1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit Builtin calls as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeGenFunction.h" 15 #include "CGObjCRuntime.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/ASTContext.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/Basic/TargetBuiltins.h" 21 #include "clang/Basic/TargetInfo.h" 22 #include "clang/CodeGen/CGFunctionInfo.h" 23 #include "llvm/IR/DataLayout.h" 24 #include "llvm/IR/Intrinsics.h" 25 26 using namespace clang; 27 using namespace CodeGen; 28 using namespace llvm; 29 30 /// getBuiltinLibFunction - Given a builtin id for a function like 31 /// "__builtin_fabsf", return a Function* for "fabsf". 32 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, 33 unsigned BuiltinID) { 34 assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); 35 36 // Get the name, skip over the __builtin_ prefix (if necessary). 37 StringRef Name; 38 GlobalDecl D(FD); 39 40 // If the builtin has been declared explicitly with an assembler label, 41 // use the mangled name. This differs from the plain label on platforms 42 // that prefix labels. 43 if (FD->hasAttr<AsmLabelAttr>()) 44 Name = getMangledName(D); 45 else 46 Name = Context.BuiltinInfo.GetName(BuiltinID) + 10; 47 48 llvm::FunctionType *Ty = 49 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); 50 51 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); 52 } 53 54 /// Emit the conversions required to turn the given value into an 55 /// integer of the given size. 56 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 57 QualType T, llvm::IntegerType *IntType) { 58 V = CGF.EmitToMemory(V, T); 59 60 if (V->getType()->isPointerTy()) 61 return CGF.Builder.CreatePtrToInt(V, IntType); 62 63 assert(V->getType() == IntType); 64 return V; 65 } 66 67 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 68 QualType T, llvm::Type *ResultType) { 69 V = CGF.EmitFromMemory(V, T); 70 71 if (ResultType->isPointerTy()) 72 return CGF.Builder.CreateIntToPtr(V, ResultType); 73 74 assert(V->getType() == ResultType); 75 return V; 76 } 77 78 /// Utility to insert an atomic instruction based on Instrinsic::ID 79 /// and the expression node. 80 static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 81 llvm::AtomicRMWInst::BinOp Kind, 82 const CallExpr *E) { 83 QualType T = E->getType(); 84 assert(E->getArg(0)->getType()->isPointerType()); 85 assert(CGF.getContext().hasSameUnqualifiedType(T, 86 E->getArg(0)->getType()->getPointeeType())); 87 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 88 89 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 90 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 91 92 llvm::IntegerType *IntType = 93 llvm::IntegerType::get(CGF.getLLVMContext(), 94 CGF.getContext().getTypeSize(T)); 95 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 96 97 llvm::Value *Args[2]; 98 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 99 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 100 llvm::Type *ValueType = Args[1]->getType(); 101 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 102 103 llvm::Value *Result = 104 CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1], 105 llvm::SequentiallyConsistent); 106 Result = EmitFromInt(CGF, Result, T, ValueType); 107 return RValue::get(Result); 108 } 109 110 /// Utility to insert an atomic instruction based Instrinsic::ID and 111 /// the expression node, where the return value is the result of the 112 /// operation. 113 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 114 llvm::AtomicRMWInst::BinOp Kind, 115 const CallExpr *E, 116 Instruction::BinaryOps Op) { 117 QualType T = E->getType(); 118 assert(E->getArg(0)->getType()->isPointerType()); 119 assert(CGF.getContext().hasSameUnqualifiedType(T, 120 E->getArg(0)->getType()->getPointeeType())); 121 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 122 123 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 124 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 125 126 llvm::IntegerType *IntType = 127 llvm::IntegerType::get(CGF.getLLVMContext(), 128 CGF.getContext().getTypeSize(T)); 129 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 130 131 llvm::Value *Args[2]; 132 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 133 llvm::Type *ValueType = Args[1]->getType(); 134 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 135 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 136 137 llvm::Value *Result = 138 CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1], 139 llvm::SequentiallyConsistent); 140 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 141 Result = EmitFromInt(CGF, Result, T, ValueType); 142 return RValue::get(Result); 143 } 144 145 /// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy, 146 /// which must be a scalar floating point type. 147 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) { 148 const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>(); 149 assert(ValTyP && "isn't scalar fp type!"); 150 151 StringRef FnName; 152 switch (ValTyP->getKind()) { 153 default: llvm_unreachable("Isn't a scalar fp type!"); 154 case BuiltinType::Float: FnName = "fabsf"; break; 155 case BuiltinType::Double: FnName = "fabs"; break; 156 case BuiltinType::LongDouble: FnName = "fabsl"; break; 157 } 158 159 // The prototype is something that takes and returns whatever V's type is. 160 llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), V->getType(), 161 false); 162 llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName); 163 164 return CGF.EmitNounwindRuntimeCall(Fn, V, "abs"); 165 } 166 167 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn, 168 const CallExpr *E, llvm::Value *calleeValue) { 169 return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E->getLocStart(), 170 ReturnValueSlot(), E->arg_begin(), E->arg_end(), Fn); 171 } 172 173 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* 174 /// depending on IntrinsicID. 175 /// 176 /// \arg CGF The current codegen function. 177 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. 178 /// \arg X The first argument to the llvm.*.with.overflow.*. 179 /// \arg Y The second argument to the llvm.*.with.overflow.*. 180 /// \arg Carry The carry returned by the llvm.*.with.overflow.*. 181 /// \returns The result (i.e. sum/product) returned by the intrinsic. 182 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, 183 const llvm::Intrinsic::ID IntrinsicID, 184 llvm::Value *X, llvm::Value *Y, 185 llvm::Value *&Carry) { 186 // Make sure we have integers of the same width. 187 assert(X->getType() == Y->getType() && 188 "Arguments must be the same type. (Did you forget to make sure both " 189 "arguments have the same integer width?)"); 190 191 llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); 192 llvm::Value *Tmp = CGF.Builder.CreateCall2(Callee, X, Y); 193 Carry = CGF.Builder.CreateExtractValue(Tmp, 1); 194 return CGF.Builder.CreateExtractValue(Tmp, 0); 195 } 196 197 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 198 unsigned BuiltinID, const CallExpr *E) { 199 // See if we can constant fold this builtin. If so, don't emit it at all. 200 Expr::EvalResult Result; 201 if (E->EvaluateAsRValue(Result, CGM.getContext()) && 202 !Result.hasSideEffects()) { 203 if (Result.Val.isInt()) 204 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 205 Result.Val.getInt())); 206 if (Result.Val.isFloat()) 207 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 208 Result.Val.getFloat())); 209 } 210 211 switch (BuiltinID) { 212 default: break; // Handle intrinsics and libm functions below. 213 case Builtin::BI__builtin___CFStringMakeConstantString: 214 case Builtin::BI__builtin___NSStringMakeConstantString: 215 return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0)); 216 case Builtin::BI__builtin_stdarg_start: 217 case Builtin::BI__builtin_va_start: 218 case Builtin::BI__builtin_va_end: { 219 Value *ArgValue = EmitVAListRef(E->getArg(0)); 220 llvm::Type *DestType = Int8PtrTy; 221 if (ArgValue->getType() != DestType) 222 ArgValue = Builder.CreateBitCast(ArgValue, DestType, 223 ArgValue->getName().data()); 224 225 Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ? 226 Intrinsic::vaend : Intrinsic::vastart; 227 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue)); 228 } 229 case Builtin::BI__builtin_va_copy: { 230 Value *DstPtr = EmitVAListRef(E->getArg(0)); 231 Value *SrcPtr = EmitVAListRef(E->getArg(1)); 232 233 llvm::Type *Type = Int8PtrTy; 234 235 DstPtr = Builder.CreateBitCast(DstPtr, Type); 236 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 237 return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy), 238 DstPtr, SrcPtr)); 239 } 240 case Builtin::BI__builtin_abs: 241 case Builtin::BI__builtin_labs: 242 case Builtin::BI__builtin_llabs: { 243 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 244 245 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 246 Value *CmpResult = 247 Builder.CreateICmpSGE(ArgValue, 248 llvm::Constant::getNullValue(ArgValue->getType()), 249 "abscond"); 250 Value *Result = 251 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 252 253 return RValue::get(Result); 254 } 255 256 case Builtin::BI__builtin_conj: 257 case Builtin::BI__builtin_conjf: 258 case Builtin::BI__builtin_conjl: { 259 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 260 Value *Real = ComplexVal.first; 261 Value *Imag = ComplexVal.second; 262 Value *Zero = 263 Imag->getType()->isFPOrFPVectorTy() 264 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) 265 : llvm::Constant::getNullValue(Imag->getType()); 266 267 Imag = Builder.CreateFSub(Zero, Imag, "sub"); 268 return RValue::getComplex(std::make_pair(Real, Imag)); 269 } 270 case Builtin::BI__builtin_creal: 271 case Builtin::BI__builtin_crealf: 272 case Builtin::BI__builtin_creall: 273 case Builtin::BIcreal: 274 case Builtin::BIcrealf: 275 case Builtin::BIcreall: { 276 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 277 return RValue::get(ComplexVal.first); 278 } 279 280 case Builtin::BI__builtin_cimag: 281 case Builtin::BI__builtin_cimagf: 282 case Builtin::BI__builtin_cimagl: 283 case Builtin::BIcimag: 284 case Builtin::BIcimagf: 285 case Builtin::BIcimagl: { 286 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 287 return RValue::get(ComplexVal.second); 288 } 289 290 case Builtin::BI__builtin_ctzs: 291 case Builtin::BI__builtin_ctz: 292 case Builtin::BI__builtin_ctzl: 293 case Builtin::BI__builtin_ctzll: { 294 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 295 296 llvm::Type *ArgType = ArgValue->getType(); 297 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 298 299 llvm::Type *ResultType = ConvertType(E->getType()); 300 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 301 Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef); 302 if (Result->getType() != ResultType) 303 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 304 "cast"); 305 return RValue::get(Result); 306 } 307 case Builtin::BI__builtin_clzs: 308 case Builtin::BI__builtin_clz: 309 case Builtin::BI__builtin_clzl: 310 case Builtin::BI__builtin_clzll: { 311 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 312 313 llvm::Type *ArgType = ArgValue->getType(); 314 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 315 316 llvm::Type *ResultType = ConvertType(E->getType()); 317 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 318 Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef); 319 if (Result->getType() != ResultType) 320 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 321 "cast"); 322 return RValue::get(Result); 323 } 324 case Builtin::BI__builtin_ffs: 325 case Builtin::BI__builtin_ffsl: 326 case Builtin::BI__builtin_ffsll: { 327 // ffs(x) -> x ? cttz(x) + 1 : 0 328 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 329 330 llvm::Type *ArgType = ArgValue->getType(); 331 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 332 333 llvm::Type *ResultType = ConvertType(E->getType()); 334 Value *Tmp = Builder.CreateAdd(Builder.CreateCall2(F, ArgValue, 335 Builder.getTrue()), 336 llvm::ConstantInt::get(ArgType, 1)); 337 Value *Zero = llvm::Constant::getNullValue(ArgType); 338 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 339 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 340 if (Result->getType() != ResultType) 341 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 342 "cast"); 343 return RValue::get(Result); 344 } 345 case Builtin::BI__builtin_parity: 346 case Builtin::BI__builtin_parityl: 347 case Builtin::BI__builtin_parityll: { 348 // parity(x) -> ctpop(x) & 1 349 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 350 351 llvm::Type *ArgType = ArgValue->getType(); 352 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 353 354 llvm::Type *ResultType = ConvertType(E->getType()); 355 Value *Tmp = Builder.CreateCall(F, ArgValue); 356 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 357 if (Result->getType() != ResultType) 358 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 359 "cast"); 360 return RValue::get(Result); 361 } 362 case Builtin::BI__builtin_popcount: 363 case Builtin::BI__builtin_popcountl: 364 case Builtin::BI__builtin_popcountll: { 365 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 366 367 llvm::Type *ArgType = ArgValue->getType(); 368 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 369 370 llvm::Type *ResultType = ConvertType(E->getType()); 371 Value *Result = Builder.CreateCall(F, ArgValue); 372 if (Result->getType() != ResultType) 373 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 374 "cast"); 375 return RValue::get(Result); 376 } 377 case Builtin::BI__builtin_expect: { 378 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 379 llvm::Type *ArgType = ArgValue->getType(); 380 381 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); 382 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 383 384 Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue, 385 "expval"); 386 return RValue::get(Result); 387 } 388 case Builtin::BI__builtin_bswap16: 389 case Builtin::BI__builtin_bswap32: 390 case Builtin::BI__builtin_bswap64: { 391 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 392 llvm::Type *ArgType = ArgValue->getType(); 393 Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType); 394 return RValue::get(Builder.CreateCall(F, ArgValue)); 395 } 396 case Builtin::BI__builtin_object_size: { 397 // We rely on constant folding to deal with expressions with side effects. 398 assert(!E->getArg(0)->HasSideEffects(getContext()) && 399 "should have been constant folded"); 400 401 // We pass this builtin onto the optimizer so that it can 402 // figure out the object size in more complex cases. 403 llvm::Type *ResType = ConvertType(E->getType()); 404 405 // LLVM only supports 0 and 2, make sure that we pass along that 406 // as a boolean. 407 Value *Ty = EmitScalarExpr(E->getArg(1)); 408 ConstantInt *CI = dyn_cast<ConstantInt>(Ty); 409 assert(CI); 410 uint64_t val = CI->getZExtValue(); 411 CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1); 412 // FIXME: Get right address space. 413 llvm::Type *Tys[] = { ResType, Builder.getInt8PtrTy(0) }; 414 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys); 415 return RValue::get(Builder.CreateCall2(F, EmitScalarExpr(E->getArg(0)),CI)); 416 } 417 case Builtin::BI__builtin_prefetch: { 418 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 419 // FIXME: Technically these constants should of type 'int', yes? 420 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 421 llvm::ConstantInt::get(Int32Ty, 0); 422 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 423 llvm::ConstantInt::get(Int32Ty, 3); 424 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 425 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 426 return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data)); 427 } 428 case Builtin::BI__builtin_readcyclecounter: { 429 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); 430 return RValue::get(Builder.CreateCall(F)); 431 } 432 case Builtin::BI__builtin_trap: { 433 Value *F = CGM.getIntrinsic(Intrinsic::trap); 434 return RValue::get(Builder.CreateCall(F)); 435 } 436 case Builtin::BI__debugbreak: { 437 Value *F = CGM.getIntrinsic(Intrinsic::debugtrap); 438 return RValue::get(Builder.CreateCall(F)); 439 } 440 case Builtin::BI__builtin_unreachable: { 441 if (SanOpts->Unreachable) 442 EmitCheck(Builder.getFalse(), "builtin_unreachable", 443 EmitCheckSourceLocation(E->getExprLoc()), 444 ArrayRef<llvm::Value *>(), CRK_Unrecoverable); 445 else 446 Builder.CreateUnreachable(); 447 448 // We do need to preserve an insertion point. 449 EmitBlock(createBasicBlock("unreachable.cont")); 450 451 return RValue::get(0); 452 } 453 454 case Builtin::BI__builtin_powi: 455 case Builtin::BI__builtin_powif: 456 case Builtin::BI__builtin_powil: { 457 Value *Base = EmitScalarExpr(E->getArg(0)); 458 Value *Exponent = EmitScalarExpr(E->getArg(1)); 459 llvm::Type *ArgType = Base->getType(); 460 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); 461 return RValue::get(Builder.CreateCall2(F, Base, Exponent)); 462 } 463 464 case Builtin::BI__builtin_isgreater: 465 case Builtin::BI__builtin_isgreaterequal: 466 case Builtin::BI__builtin_isless: 467 case Builtin::BI__builtin_islessequal: 468 case Builtin::BI__builtin_islessgreater: 469 case Builtin::BI__builtin_isunordered: { 470 // Ordered comparisons: we know the arguments to these are matching scalar 471 // floating point values. 472 Value *LHS = EmitScalarExpr(E->getArg(0)); 473 Value *RHS = EmitScalarExpr(E->getArg(1)); 474 475 switch (BuiltinID) { 476 default: llvm_unreachable("Unknown ordered comparison"); 477 case Builtin::BI__builtin_isgreater: 478 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 479 break; 480 case Builtin::BI__builtin_isgreaterequal: 481 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 482 break; 483 case Builtin::BI__builtin_isless: 484 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 485 break; 486 case Builtin::BI__builtin_islessequal: 487 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 488 break; 489 case Builtin::BI__builtin_islessgreater: 490 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 491 break; 492 case Builtin::BI__builtin_isunordered: 493 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 494 break; 495 } 496 // ZExt bool to int type. 497 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); 498 } 499 case Builtin::BI__builtin_isnan: { 500 Value *V = EmitScalarExpr(E->getArg(0)); 501 V = Builder.CreateFCmpUNO(V, V, "cmp"); 502 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 503 } 504 505 case Builtin::BI__builtin_isinf: { 506 // isinf(x) --> fabs(x) == infinity 507 Value *V = EmitScalarExpr(E->getArg(0)); 508 V = EmitFAbs(*this, V, E->getArg(0)->getType()); 509 510 V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf"); 511 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 512 } 513 514 // TODO: BI__builtin_isinf_sign 515 // isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0 516 517 case Builtin::BI__builtin_isnormal: { 518 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 519 Value *V = EmitScalarExpr(E->getArg(0)); 520 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 521 522 Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType()); 523 Value *IsLessThanInf = 524 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 525 APFloat Smallest = APFloat::getSmallestNormalized( 526 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 527 Value *IsNormal = 528 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 529 "isnormal"); 530 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 531 V = Builder.CreateAnd(V, IsNormal, "and"); 532 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 533 } 534 535 case Builtin::BI__builtin_isfinite: { 536 // isfinite(x) --> x == x && fabs(x) != infinity; 537 Value *V = EmitScalarExpr(E->getArg(0)); 538 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 539 540 Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType()); 541 Value *IsNotInf = 542 Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 543 544 V = Builder.CreateAnd(Eq, IsNotInf, "and"); 545 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 546 } 547 548 case Builtin::BI__builtin_fpclassify: { 549 Value *V = EmitScalarExpr(E->getArg(5)); 550 llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 551 552 // Create Result 553 BasicBlock *Begin = Builder.GetInsertBlock(); 554 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 555 Builder.SetInsertPoint(End); 556 PHINode *Result = 557 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 558 "fpclassify_result"); 559 560 // if (V==0) return FP_ZERO 561 Builder.SetInsertPoint(Begin); 562 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 563 "iszero"); 564 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 565 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 566 Builder.CreateCondBr(IsZero, End, NotZero); 567 Result->addIncoming(ZeroLiteral, Begin); 568 569 // if (V != V) return FP_NAN 570 Builder.SetInsertPoint(NotZero); 571 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 572 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 573 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 574 Builder.CreateCondBr(IsNan, End, NotNan); 575 Result->addIncoming(NanLiteral, NotZero); 576 577 // if (fabs(V) == infinity) return FP_INFINITY 578 Builder.SetInsertPoint(NotNan); 579 Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType()); 580 Value *IsInf = 581 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 582 "isinf"); 583 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 584 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 585 Builder.CreateCondBr(IsInf, End, NotInf); 586 Result->addIncoming(InfLiteral, NotNan); 587 588 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 589 Builder.SetInsertPoint(NotInf); 590 APFloat Smallest = APFloat::getSmallestNormalized( 591 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 592 Value *IsNormal = 593 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 594 "isnormal"); 595 Value *NormalResult = 596 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 597 EmitScalarExpr(E->getArg(3))); 598 Builder.CreateBr(End); 599 Result->addIncoming(NormalResult, NotInf); 600 601 // return Result 602 Builder.SetInsertPoint(End); 603 return RValue::get(Result); 604 } 605 606 case Builtin::BIalloca: 607 case Builtin::BI_alloca: 608 case Builtin::BI__builtin_alloca: { 609 Value *Size = EmitScalarExpr(E->getArg(0)); 610 return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size)); 611 } 612 case Builtin::BIbzero: 613 case Builtin::BI__builtin_bzero: { 614 std::pair<llvm::Value*, unsigned> Dest = 615 EmitPointerWithAlignment(E->getArg(0)); 616 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 617 Builder.CreateMemSet(Dest.first, Builder.getInt8(0), SizeVal, 618 Dest.second, false); 619 return RValue::get(Dest.first); 620 } 621 case Builtin::BImemcpy: 622 case Builtin::BI__builtin_memcpy: { 623 std::pair<llvm::Value*, unsigned> Dest = 624 EmitPointerWithAlignment(E->getArg(0)); 625 std::pair<llvm::Value*, unsigned> Src = 626 EmitPointerWithAlignment(E->getArg(1)); 627 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 628 unsigned Align = std::min(Dest.second, Src.second); 629 Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false); 630 return RValue::get(Dest.first); 631 } 632 633 case Builtin::BI__builtin___memcpy_chk: { 634 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. 635 llvm::APSInt Size, DstSize; 636 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 637 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 638 break; 639 if (Size.ugt(DstSize)) 640 break; 641 std::pair<llvm::Value*, unsigned> Dest = 642 EmitPointerWithAlignment(E->getArg(0)); 643 std::pair<llvm::Value*, unsigned> Src = 644 EmitPointerWithAlignment(E->getArg(1)); 645 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 646 unsigned Align = std::min(Dest.second, Src.second); 647 Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false); 648 return RValue::get(Dest.first); 649 } 650 651 case Builtin::BI__builtin_objc_memmove_collectable: { 652 Value *Address = EmitScalarExpr(E->getArg(0)); 653 Value *SrcAddr = EmitScalarExpr(E->getArg(1)); 654 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 655 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 656 Address, SrcAddr, SizeVal); 657 return RValue::get(Address); 658 } 659 660 case Builtin::BI__builtin___memmove_chk: { 661 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. 662 llvm::APSInt Size, DstSize; 663 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 664 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 665 break; 666 if (Size.ugt(DstSize)) 667 break; 668 std::pair<llvm::Value*, unsigned> Dest = 669 EmitPointerWithAlignment(E->getArg(0)); 670 std::pair<llvm::Value*, unsigned> Src = 671 EmitPointerWithAlignment(E->getArg(1)); 672 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 673 unsigned Align = std::min(Dest.second, Src.second); 674 Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false); 675 return RValue::get(Dest.first); 676 } 677 678 case Builtin::BImemmove: 679 case Builtin::BI__builtin_memmove: { 680 std::pair<llvm::Value*, unsigned> Dest = 681 EmitPointerWithAlignment(E->getArg(0)); 682 std::pair<llvm::Value*, unsigned> Src = 683 EmitPointerWithAlignment(E->getArg(1)); 684 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 685 unsigned Align = std::min(Dest.second, Src.second); 686 Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false); 687 return RValue::get(Dest.first); 688 } 689 case Builtin::BImemset: 690 case Builtin::BI__builtin_memset: { 691 std::pair<llvm::Value*, unsigned> Dest = 692 EmitPointerWithAlignment(E->getArg(0)); 693 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 694 Builder.getInt8Ty()); 695 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 696 Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false); 697 return RValue::get(Dest.first); 698 } 699 case Builtin::BI__builtin___memset_chk: { 700 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 701 llvm::APSInt Size, DstSize; 702 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 703 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 704 break; 705 if (Size.ugt(DstSize)) 706 break; 707 std::pair<llvm::Value*, unsigned> Dest = 708 EmitPointerWithAlignment(E->getArg(0)); 709 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 710 Builder.getInt8Ty()); 711 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 712 Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false); 713 return RValue::get(Dest.first); 714 } 715 case Builtin::BI__builtin_dwarf_cfa: { 716 // The offset in bytes from the first argument to the CFA. 717 // 718 // Why on earth is this in the frontend? Is there any reason at 719 // all that the backend can't reasonably determine this while 720 // lowering llvm.eh.dwarf.cfa()? 721 // 722 // TODO: If there's a satisfactory reason, add a target hook for 723 // this instead of hard-coding 0, which is correct for most targets. 724 int32_t Offset = 0; 725 726 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); 727 return RValue::get(Builder.CreateCall(F, 728 llvm::ConstantInt::get(Int32Ty, Offset))); 729 } 730 case Builtin::BI__builtin_return_address: { 731 Value *Depth = EmitScalarExpr(E->getArg(0)); 732 Depth = Builder.CreateIntCast(Depth, Int32Ty, false); 733 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 734 return RValue::get(Builder.CreateCall(F, Depth)); 735 } 736 case Builtin::BI__builtin_frame_address: { 737 Value *Depth = EmitScalarExpr(E->getArg(0)); 738 Depth = Builder.CreateIntCast(Depth, Int32Ty, false); 739 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); 740 return RValue::get(Builder.CreateCall(F, Depth)); 741 } 742 case Builtin::BI__builtin_extract_return_addr: { 743 Value *Address = EmitScalarExpr(E->getArg(0)); 744 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 745 return RValue::get(Result); 746 } 747 case Builtin::BI__builtin_frob_return_addr: { 748 Value *Address = EmitScalarExpr(E->getArg(0)); 749 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 750 return RValue::get(Result); 751 } 752 case Builtin::BI__builtin_dwarf_sp_column: { 753 llvm::IntegerType *Ty 754 = cast<llvm::IntegerType>(ConvertType(E->getType())); 755 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 756 if (Column == -1) { 757 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 758 return RValue::get(llvm::UndefValue::get(Ty)); 759 } 760 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 761 } 762 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 763 Value *Address = EmitScalarExpr(E->getArg(0)); 764 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 765 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 766 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 767 } 768 case Builtin::BI__builtin_eh_return: { 769 Value *Int = EmitScalarExpr(E->getArg(0)); 770 Value *Ptr = EmitScalarExpr(E->getArg(1)); 771 772 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 773 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 774 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 775 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 776 ? Intrinsic::eh_return_i32 777 : Intrinsic::eh_return_i64); 778 Builder.CreateCall2(F, Int, Ptr); 779 Builder.CreateUnreachable(); 780 781 // We do need to preserve an insertion point. 782 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 783 784 return RValue::get(0); 785 } 786 case Builtin::BI__builtin_unwind_init: { 787 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); 788 return RValue::get(Builder.CreateCall(F)); 789 } 790 case Builtin::BI__builtin_extend_pointer: { 791 // Extends a pointer to the size of an _Unwind_Word, which is 792 // uint64_t on all platforms. Generally this gets poked into a 793 // register and eventually used as an address, so if the 794 // addressing registers are wider than pointers and the platform 795 // doesn't implicitly ignore high-order bits when doing 796 // addressing, we need to make sure we zext / sext based on 797 // the platform's expectations. 798 // 799 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 800 801 // Cast the pointer to intptr_t. 802 Value *Ptr = EmitScalarExpr(E->getArg(0)); 803 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 804 805 // If that's 64 bits, we're done. 806 if (IntPtrTy->getBitWidth() == 64) 807 return RValue::get(Result); 808 809 // Otherwise, ask the codegen data what to do. 810 if (getTargetHooks().extendPointerWithSExt()) 811 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 812 else 813 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 814 } 815 case Builtin::BI__builtin_setjmp: { 816 // Buffer is a void**. 817 Value *Buf = EmitScalarExpr(E->getArg(0)); 818 819 // Store the frame pointer to the setjmp buffer. 820 Value *FrameAddr = 821 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 822 ConstantInt::get(Int32Ty, 0)); 823 Builder.CreateStore(FrameAddr, Buf); 824 825 // Store the stack pointer to the setjmp buffer. 826 Value *StackAddr = 827 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 828 Value *StackSaveSlot = 829 Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2)); 830 Builder.CreateStore(StackAddr, StackSaveSlot); 831 832 // Call LLVM's EH setjmp, which is lightweight. 833 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 834 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 835 return RValue::get(Builder.CreateCall(F, Buf)); 836 } 837 case Builtin::BI__builtin_longjmp: { 838 Value *Buf = EmitScalarExpr(E->getArg(0)); 839 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 840 841 // Call LLVM's EH longjmp, which is lightweight. 842 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 843 844 // longjmp doesn't return; mark this as unreachable. 845 Builder.CreateUnreachable(); 846 847 // We do need to preserve an insertion point. 848 EmitBlock(createBasicBlock("longjmp.cont")); 849 850 return RValue::get(0); 851 } 852 case Builtin::BI__sync_fetch_and_add: 853 case Builtin::BI__sync_fetch_and_sub: 854 case Builtin::BI__sync_fetch_and_or: 855 case Builtin::BI__sync_fetch_and_and: 856 case Builtin::BI__sync_fetch_and_xor: 857 case Builtin::BI__sync_add_and_fetch: 858 case Builtin::BI__sync_sub_and_fetch: 859 case Builtin::BI__sync_and_and_fetch: 860 case Builtin::BI__sync_or_and_fetch: 861 case Builtin::BI__sync_xor_and_fetch: 862 case Builtin::BI__sync_val_compare_and_swap: 863 case Builtin::BI__sync_bool_compare_and_swap: 864 case Builtin::BI__sync_lock_test_and_set: 865 case Builtin::BI__sync_lock_release: 866 case Builtin::BI__sync_swap: 867 llvm_unreachable("Shouldn't make it through sema"); 868 case Builtin::BI__sync_fetch_and_add_1: 869 case Builtin::BI__sync_fetch_and_add_2: 870 case Builtin::BI__sync_fetch_and_add_4: 871 case Builtin::BI__sync_fetch_and_add_8: 872 case Builtin::BI__sync_fetch_and_add_16: 873 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); 874 case Builtin::BI__sync_fetch_and_sub_1: 875 case Builtin::BI__sync_fetch_and_sub_2: 876 case Builtin::BI__sync_fetch_and_sub_4: 877 case Builtin::BI__sync_fetch_and_sub_8: 878 case Builtin::BI__sync_fetch_and_sub_16: 879 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); 880 case Builtin::BI__sync_fetch_and_or_1: 881 case Builtin::BI__sync_fetch_and_or_2: 882 case Builtin::BI__sync_fetch_and_or_4: 883 case Builtin::BI__sync_fetch_and_or_8: 884 case Builtin::BI__sync_fetch_and_or_16: 885 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); 886 case Builtin::BI__sync_fetch_and_and_1: 887 case Builtin::BI__sync_fetch_and_and_2: 888 case Builtin::BI__sync_fetch_and_and_4: 889 case Builtin::BI__sync_fetch_and_and_8: 890 case Builtin::BI__sync_fetch_and_and_16: 891 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); 892 case Builtin::BI__sync_fetch_and_xor_1: 893 case Builtin::BI__sync_fetch_and_xor_2: 894 case Builtin::BI__sync_fetch_and_xor_4: 895 case Builtin::BI__sync_fetch_and_xor_8: 896 case Builtin::BI__sync_fetch_and_xor_16: 897 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); 898 899 // Clang extensions: not overloaded yet. 900 case Builtin::BI__sync_fetch_and_min: 901 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); 902 case Builtin::BI__sync_fetch_and_max: 903 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); 904 case Builtin::BI__sync_fetch_and_umin: 905 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); 906 case Builtin::BI__sync_fetch_and_umax: 907 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); 908 909 case Builtin::BI__sync_add_and_fetch_1: 910 case Builtin::BI__sync_add_and_fetch_2: 911 case Builtin::BI__sync_add_and_fetch_4: 912 case Builtin::BI__sync_add_and_fetch_8: 913 case Builtin::BI__sync_add_and_fetch_16: 914 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, 915 llvm::Instruction::Add); 916 case Builtin::BI__sync_sub_and_fetch_1: 917 case Builtin::BI__sync_sub_and_fetch_2: 918 case Builtin::BI__sync_sub_and_fetch_4: 919 case Builtin::BI__sync_sub_and_fetch_8: 920 case Builtin::BI__sync_sub_and_fetch_16: 921 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, 922 llvm::Instruction::Sub); 923 case Builtin::BI__sync_and_and_fetch_1: 924 case Builtin::BI__sync_and_and_fetch_2: 925 case Builtin::BI__sync_and_and_fetch_4: 926 case Builtin::BI__sync_and_and_fetch_8: 927 case Builtin::BI__sync_and_and_fetch_16: 928 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, 929 llvm::Instruction::And); 930 case Builtin::BI__sync_or_and_fetch_1: 931 case Builtin::BI__sync_or_and_fetch_2: 932 case Builtin::BI__sync_or_and_fetch_4: 933 case Builtin::BI__sync_or_and_fetch_8: 934 case Builtin::BI__sync_or_and_fetch_16: 935 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, 936 llvm::Instruction::Or); 937 case Builtin::BI__sync_xor_and_fetch_1: 938 case Builtin::BI__sync_xor_and_fetch_2: 939 case Builtin::BI__sync_xor_and_fetch_4: 940 case Builtin::BI__sync_xor_and_fetch_8: 941 case Builtin::BI__sync_xor_and_fetch_16: 942 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, 943 llvm::Instruction::Xor); 944 945 case Builtin::BI__sync_val_compare_and_swap_1: 946 case Builtin::BI__sync_val_compare_and_swap_2: 947 case Builtin::BI__sync_val_compare_and_swap_4: 948 case Builtin::BI__sync_val_compare_and_swap_8: 949 case Builtin::BI__sync_val_compare_and_swap_16: { 950 QualType T = E->getType(); 951 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0)); 952 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 953 954 llvm::IntegerType *IntType = 955 llvm::IntegerType::get(getLLVMContext(), 956 getContext().getTypeSize(T)); 957 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 958 959 Value *Args[3]; 960 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType); 961 Args[1] = EmitScalarExpr(E->getArg(1)); 962 llvm::Type *ValueType = Args[1]->getType(); 963 Args[1] = EmitToInt(*this, Args[1], T, IntType); 964 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType); 965 966 Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2], 967 llvm::SequentiallyConsistent); 968 Result = EmitFromInt(*this, Result, T, ValueType); 969 return RValue::get(Result); 970 } 971 972 case Builtin::BI__sync_bool_compare_and_swap_1: 973 case Builtin::BI__sync_bool_compare_and_swap_2: 974 case Builtin::BI__sync_bool_compare_and_swap_4: 975 case Builtin::BI__sync_bool_compare_and_swap_8: 976 case Builtin::BI__sync_bool_compare_and_swap_16: { 977 QualType T = E->getArg(1)->getType(); 978 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0)); 979 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 980 981 llvm::IntegerType *IntType = 982 llvm::IntegerType::get(getLLVMContext(), 983 getContext().getTypeSize(T)); 984 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 985 986 Value *Args[3]; 987 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType); 988 Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType); 989 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType); 990 991 Value *OldVal = Args[1]; 992 Value *PrevVal = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2], 993 llvm::SequentiallyConsistent); 994 Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal); 995 // zext bool to int. 996 Result = Builder.CreateZExt(Result, ConvertType(E->getType())); 997 return RValue::get(Result); 998 } 999 1000 case Builtin::BI__sync_swap_1: 1001 case Builtin::BI__sync_swap_2: 1002 case Builtin::BI__sync_swap_4: 1003 case Builtin::BI__sync_swap_8: 1004 case Builtin::BI__sync_swap_16: 1005 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1006 1007 case Builtin::BI__sync_lock_test_and_set_1: 1008 case Builtin::BI__sync_lock_test_and_set_2: 1009 case Builtin::BI__sync_lock_test_and_set_4: 1010 case Builtin::BI__sync_lock_test_and_set_8: 1011 case Builtin::BI__sync_lock_test_and_set_16: 1012 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1013 1014 case Builtin::BI__sync_lock_release_1: 1015 case Builtin::BI__sync_lock_release_2: 1016 case Builtin::BI__sync_lock_release_4: 1017 case Builtin::BI__sync_lock_release_8: 1018 case Builtin::BI__sync_lock_release_16: { 1019 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1020 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 1021 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 1022 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 1023 StoreSize.getQuantity() * 8); 1024 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 1025 llvm::StoreInst *Store = 1026 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr); 1027 Store->setAlignment(StoreSize.getQuantity()); 1028 Store->setAtomic(llvm::Release); 1029 return RValue::get(0); 1030 } 1031 1032 case Builtin::BI__sync_synchronize: { 1033 // We assume this is supposed to correspond to a C++0x-style 1034 // sequentially-consistent fence (i.e. this is only usable for 1035 // synchonization, not device I/O or anything like that). This intrinsic 1036 // is really badly designed in the sense that in theory, there isn't 1037 // any way to safely use it... but in practice, it mostly works 1038 // to use it with non-atomic loads and stores to get acquire/release 1039 // semantics. 1040 Builder.CreateFence(llvm::SequentiallyConsistent); 1041 return RValue::get(0); 1042 } 1043 1044 case Builtin::BI__c11_atomic_is_lock_free: 1045 case Builtin::BI__atomic_is_lock_free: { 1046 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the 1047 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since 1048 // _Atomic(T) is always properly-aligned. 1049 const char *LibCallName = "__atomic_is_lock_free"; 1050 CallArgList Args; 1051 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), 1052 getContext().getSizeType()); 1053 if (BuiltinID == Builtin::BI__atomic_is_lock_free) 1054 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), 1055 getContext().VoidPtrTy); 1056 else 1057 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), 1058 getContext().VoidPtrTy); 1059 const CGFunctionInfo &FuncInfo = 1060 CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args, 1061 FunctionType::ExtInfo(), 1062 RequiredArgs::All); 1063 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 1064 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 1065 return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args); 1066 } 1067 1068 case Builtin::BI__atomic_test_and_set: { 1069 // Look at the argument type to determine whether this is a volatile 1070 // operation. The parameter type is always volatile. 1071 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1072 bool Volatile = 1073 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1074 1075 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1076 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1077 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1078 Value *NewVal = Builder.getInt8(1); 1079 Value *Order = EmitScalarExpr(E->getArg(1)); 1080 if (isa<llvm::ConstantInt>(Order)) { 1081 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1082 AtomicRMWInst *Result = 0; 1083 switch (ord) { 1084 case 0: // memory_order_relaxed 1085 default: // invalid order 1086 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1087 Ptr, NewVal, 1088 llvm::Monotonic); 1089 break; 1090 case 1: // memory_order_consume 1091 case 2: // memory_order_acquire 1092 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1093 Ptr, NewVal, 1094 llvm::Acquire); 1095 break; 1096 case 3: // memory_order_release 1097 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1098 Ptr, NewVal, 1099 llvm::Release); 1100 break; 1101 case 4: // memory_order_acq_rel 1102 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1103 Ptr, NewVal, 1104 llvm::AcquireRelease); 1105 break; 1106 case 5: // memory_order_seq_cst 1107 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1108 Ptr, NewVal, 1109 llvm::SequentiallyConsistent); 1110 break; 1111 } 1112 Result->setVolatile(Volatile); 1113 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1114 } 1115 1116 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1117 1118 llvm::BasicBlock *BBs[5] = { 1119 createBasicBlock("monotonic", CurFn), 1120 createBasicBlock("acquire", CurFn), 1121 createBasicBlock("release", CurFn), 1122 createBasicBlock("acqrel", CurFn), 1123 createBasicBlock("seqcst", CurFn) 1124 }; 1125 llvm::AtomicOrdering Orders[5] = { 1126 llvm::Monotonic, llvm::Acquire, llvm::Release, 1127 llvm::AcquireRelease, llvm::SequentiallyConsistent 1128 }; 1129 1130 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1131 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1132 1133 Builder.SetInsertPoint(ContBB); 1134 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); 1135 1136 for (unsigned i = 0; i < 5; ++i) { 1137 Builder.SetInsertPoint(BBs[i]); 1138 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1139 Ptr, NewVal, Orders[i]); 1140 RMW->setVolatile(Volatile); 1141 Result->addIncoming(RMW, BBs[i]); 1142 Builder.CreateBr(ContBB); 1143 } 1144 1145 SI->addCase(Builder.getInt32(0), BBs[0]); 1146 SI->addCase(Builder.getInt32(1), BBs[1]); 1147 SI->addCase(Builder.getInt32(2), BBs[1]); 1148 SI->addCase(Builder.getInt32(3), BBs[2]); 1149 SI->addCase(Builder.getInt32(4), BBs[3]); 1150 SI->addCase(Builder.getInt32(5), BBs[4]); 1151 1152 Builder.SetInsertPoint(ContBB); 1153 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1154 } 1155 1156 case Builtin::BI__atomic_clear: { 1157 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1158 bool Volatile = 1159 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1160 1161 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1162 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1163 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1164 Value *NewVal = Builder.getInt8(0); 1165 Value *Order = EmitScalarExpr(E->getArg(1)); 1166 if (isa<llvm::ConstantInt>(Order)) { 1167 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1168 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1169 Store->setAlignment(1); 1170 switch (ord) { 1171 case 0: // memory_order_relaxed 1172 default: // invalid order 1173 Store->setOrdering(llvm::Monotonic); 1174 break; 1175 case 3: // memory_order_release 1176 Store->setOrdering(llvm::Release); 1177 break; 1178 case 5: // memory_order_seq_cst 1179 Store->setOrdering(llvm::SequentiallyConsistent); 1180 break; 1181 } 1182 return RValue::get(0); 1183 } 1184 1185 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1186 1187 llvm::BasicBlock *BBs[3] = { 1188 createBasicBlock("monotonic", CurFn), 1189 createBasicBlock("release", CurFn), 1190 createBasicBlock("seqcst", CurFn) 1191 }; 1192 llvm::AtomicOrdering Orders[3] = { 1193 llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent 1194 }; 1195 1196 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1197 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1198 1199 for (unsigned i = 0; i < 3; ++i) { 1200 Builder.SetInsertPoint(BBs[i]); 1201 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1202 Store->setAlignment(1); 1203 Store->setOrdering(Orders[i]); 1204 Builder.CreateBr(ContBB); 1205 } 1206 1207 SI->addCase(Builder.getInt32(0), BBs[0]); 1208 SI->addCase(Builder.getInt32(3), BBs[1]); 1209 SI->addCase(Builder.getInt32(5), BBs[2]); 1210 1211 Builder.SetInsertPoint(ContBB); 1212 return RValue::get(0); 1213 } 1214 1215 case Builtin::BI__atomic_thread_fence: 1216 case Builtin::BI__atomic_signal_fence: 1217 case Builtin::BI__c11_atomic_thread_fence: 1218 case Builtin::BI__c11_atomic_signal_fence: { 1219 llvm::SynchronizationScope Scope; 1220 if (BuiltinID == Builtin::BI__atomic_signal_fence || 1221 BuiltinID == Builtin::BI__c11_atomic_signal_fence) 1222 Scope = llvm::SingleThread; 1223 else 1224 Scope = llvm::CrossThread; 1225 Value *Order = EmitScalarExpr(E->getArg(0)); 1226 if (isa<llvm::ConstantInt>(Order)) { 1227 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1228 switch (ord) { 1229 case 0: // memory_order_relaxed 1230 default: // invalid order 1231 break; 1232 case 1: // memory_order_consume 1233 case 2: // memory_order_acquire 1234 Builder.CreateFence(llvm::Acquire, Scope); 1235 break; 1236 case 3: // memory_order_release 1237 Builder.CreateFence(llvm::Release, Scope); 1238 break; 1239 case 4: // memory_order_acq_rel 1240 Builder.CreateFence(llvm::AcquireRelease, Scope); 1241 break; 1242 case 5: // memory_order_seq_cst 1243 Builder.CreateFence(llvm::SequentiallyConsistent, Scope); 1244 break; 1245 } 1246 return RValue::get(0); 1247 } 1248 1249 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; 1250 AcquireBB = createBasicBlock("acquire", CurFn); 1251 ReleaseBB = createBasicBlock("release", CurFn); 1252 AcqRelBB = createBasicBlock("acqrel", CurFn); 1253 SeqCstBB = createBasicBlock("seqcst", CurFn); 1254 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1255 1256 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1257 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); 1258 1259 Builder.SetInsertPoint(AcquireBB); 1260 Builder.CreateFence(llvm::Acquire, Scope); 1261 Builder.CreateBr(ContBB); 1262 SI->addCase(Builder.getInt32(1), AcquireBB); 1263 SI->addCase(Builder.getInt32(2), AcquireBB); 1264 1265 Builder.SetInsertPoint(ReleaseBB); 1266 Builder.CreateFence(llvm::Release, Scope); 1267 Builder.CreateBr(ContBB); 1268 SI->addCase(Builder.getInt32(3), ReleaseBB); 1269 1270 Builder.SetInsertPoint(AcqRelBB); 1271 Builder.CreateFence(llvm::AcquireRelease, Scope); 1272 Builder.CreateBr(ContBB); 1273 SI->addCase(Builder.getInt32(4), AcqRelBB); 1274 1275 Builder.SetInsertPoint(SeqCstBB); 1276 Builder.CreateFence(llvm::SequentiallyConsistent, Scope); 1277 Builder.CreateBr(ContBB); 1278 SI->addCase(Builder.getInt32(5), SeqCstBB); 1279 1280 Builder.SetInsertPoint(ContBB); 1281 return RValue::get(0); 1282 } 1283 1284 // Library functions with special handling. 1285 case Builtin::BIsqrt: 1286 case Builtin::BIsqrtf: 1287 case Builtin::BIsqrtl: { 1288 // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only 1289 // in finite- or unsafe-math mode (the intrinsic has different semantics 1290 // for handling negative numbers compared to the library function, so 1291 // -fmath-errno=0 is not enough). 1292 if (!FD->hasAttr<ConstAttr>()) 1293 break; 1294 if (!(CGM.getCodeGenOpts().UnsafeFPMath || 1295 CGM.getCodeGenOpts().NoNaNsFPMath)) 1296 break; 1297 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 1298 llvm::Type *ArgType = Arg0->getType(); 1299 Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType); 1300 return RValue::get(Builder.CreateCall(F, Arg0)); 1301 } 1302 1303 case Builtin::BIpow: 1304 case Builtin::BIpowf: 1305 case Builtin::BIpowl: { 1306 // Transform a call to pow* into a @llvm.pow.* intrinsic call. 1307 if (!FD->hasAttr<ConstAttr>()) 1308 break; 1309 Value *Base = EmitScalarExpr(E->getArg(0)); 1310 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1311 llvm::Type *ArgType = Base->getType(); 1312 Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType); 1313 return RValue::get(Builder.CreateCall2(F, Base, Exponent)); 1314 break; 1315 } 1316 1317 case Builtin::BIfma: 1318 case Builtin::BIfmaf: 1319 case Builtin::BIfmal: 1320 case Builtin::BI__builtin_fma: 1321 case Builtin::BI__builtin_fmaf: 1322 case Builtin::BI__builtin_fmal: { 1323 // Rewrite fma to intrinsic. 1324 Value *FirstArg = EmitScalarExpr(E->getArg(0)); 1325 llvm::Type *ArgType = FirstArg->getType(); 1326 Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType); 1327 return RValue::get(Builder.CreateCall3(F, FirstArg, 1328 EmitScalarExpr(E->getArg(1)), 1329 EmitScalarExpr(E->getArg(2)))); 1330 } 1331 1332 case Builtin::BI__builtin_signbit: 1333 case Builtin::BI__builtin_signbitf: 1334 case Builtin::BI__builtin_signbitl: { 1335 LLVMContext &C = CGM.getLLVMContext(); 1336 1337 Value *Arg = EmitScalarExpr(E->getArg(0)); 1338 llvm::Type *ArgTy = Arg->getType(); 1339 if (ArgTy->isPPC_FP128Ty()) 1340 break; // FIXME: I'm not sure what the right implementation is here. 1341 int ArgWidth = ArgTy->getPrimitiveSizeInBits(); 1342 llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth); 1343 Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy); 1344 Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy); 1345 Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp); 1346 return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType()))); 1347 } 1348 case Builtin::BI__builtin_annotation: { 1349 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); 1350 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, 1351 AnnVal->getType()); 1352 1353 // Get the annotation string, go through casts. Sema requires this to be a 1354 // non-wide string literal, potentially casted, so the cast<> is safe. 1355 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); 1356 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); 1357 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); 1358 } 1359 case Builtin::BI__builtin_addcb: 1360 case Builtin::BI__builtin_addcs: 1361 case Builtin::BI__builtin_addc: 1362 case Builtin::BI__builtin_addcl: 1363 case Builtin::BI__builtin_addcll: 1364 case Builtin::BI__builtin_subcb: 1365 case Builtin::BI__builtin_subcs: 1366 case Builtin::BI__builtin_subc: 1367 case Builtin::BI__builtin_subcl: 1368 case Builtin::BI__builtin_subcll: { 1369 1370 // We translate all of these builtins from expressions of the form: 1371 // int x = ..., y = ..., carryin = ..., carryout, result; 1372 // result = __builtin_addc(x, y, carryin, &carryout); 1373 // 1374 // to LLVM IR of the form: 1375 // 1376 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) 1377 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 1378 // %carry1 = extractvalue {i32, i1} %tmp1, 1 1379 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, 1380 // i32 %carryin) 1381 // %result = extractvalue {i32, i1} %tmp2, 0 1382 // %carry2 = extractvalue {i32, i1} %tmp2, 1 1383 // %tmp3 = or i1 %carry1, %carry2 1384 // %tmp4 = zext i1 %tmp3 to i32 1385 // store i32 %tmp4, i32* %carryout 1386 1387 // Scalarize our inputs. 1388 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1389 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1390 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); 1391 std::pair<llvm::Value*, unsigned> CarryOutPtr = 1392 EmitPointerWithAlignment(E->getArg(3)); 1393 1394 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. 1395 llvm::Intrinsic::ID IntrinsicId; 1396 switch (BuiltinID) { 1397 default: llvm_unreachable("Unknown multiprecision builtin id."); 1398 case Builtin::BI__builtin_addcb: 1399 case Builtin::BI__builtin_addcs: 1400 case Builtin::BI__builtin_addc: 1401 case Builtin::BI__builtin_addcl: 1402 case Builtin::BI__builtin_addcll: 1403 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 1404 break; 1405 case Builtin::BI__builtin_subcb: 1406 case Builtin::BI__builtin_subcs: 1407 case Builtin::BI__builtin_subc: 1408 case Builtin::BI__builtin_subcl: 1409 case Builtin::BI__builtin_subcll: 1410 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 1411 break; 1412 } 1413 1414 // Construct our resulting LLVM IR expression. 1415 llvm::Value *Carry1; 1416 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, 1417 X, Y, Carry1); 1418 llvm::Value *Carry2; 1419 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, 1420 Sum1, Carryin, Carry2); 1421 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), 1422 X->getType()); 1423 llvm::StoreInst *CarryOutStore = Builder.CreateStore(CarryOut, 1424 CarryOutPtr.first); 1425 CarryOutStore->setAlignment(CarryOutPtr.second); 1426 return RValue::get(Sum2); 1427 } 1428 case Builtin::BI__builtin_uadd_overflow: 1429 case Builtin::BI__builtin_uaddl_overflow: 1430 case Builtin::BI__builtin_uaddll_overflow: 1431 case Builtin::BI__builtin_usub_overflow: 1432 case Builtin::BI__builtin_usubl_overflow: 1433 case Builtin::BI__builtin_usubll_overflow: 1434 case Builtin::BI__builtin_umul_overflow: 1435 case Builtin::BI__builtin_umull_overflow: 1436 case Builtin::BI__builtin_umulll_overflow: 1437 case Builtin::BI__builtin_sadd_overflow: 1438 case Builtin::BI__builtin_saddl_overflow: 1439 case Builtin::BI__builtin_saddll_overflow: 1440 case Builtin::BI__builtin_ssub_overflow: 1441 case Builtin::BI__builtin_ssubl_overflow: 1442 case Builtin::BI__builtin_ssubll_overflow: 1443 case Builtin::BI__builtin_smul_overflow: 1444 case Builtin::BI__builtin_smull_overflow: 1445 case Builtin::BI__builtin_smulll_overflow: { 1446 1447 // We translate all of these builtins directly to the relevant llvm IR node. 1448 1449 // Scalarize our inputs. 1450 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1451 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1452 std::pair<llvm::Value *, unsigned> SumOutPtr = 1453 EmitPointerWithAlignment(E->getArg(2)); 1454 1455 // Decide which of the overflow intrinsics we are lowering to: 1456 llvm::Intrinsic::ID IntrinsicId; 1457 switch (BuiltinID) { 1458 default: llvm_unreachable("Unknown security overflow builtin id."); 1459 case Builtin::BI__builtin_uadd_overflow: 1460 case Builtin::BI__builtin_uaddl_overflow: 1461 case Builtin::BI__builtin_uaddll_overflow: 1462 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 1463 break; 1464 case Builtin::BI__builtin_usub_overflow: 1465 case Builtin::BI__builtin_usubl_overflow: 1466 case Builtin::BI__builtin_usubll_overflow: 1467 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 1468 break; 1469 case Builtin::BI__builtin_umul_overflow: 1470 case Builtin::BI__builtin_umull_overflow: 1471 case Builtin::BI__builtin_umulll_overflow: 1472 IntrinsicId = llvm::Intrinsic::umul_with_overflow; 1473 break; 1474 case Builtin::BI__builtin_sadd_overflow: 1475 case Builtin::BI__builtin_saddl_overflow: 1476 case Builtin::BI__builtin_saddll_overflow: 1477 IntrinsicId = llvm::Intrinsic::sadd_with_overflow; 1478 break; 1479 case Builtin::BI__builtin_ssub_overflow: 1480 case Builtin::BI__builtin_ssubl_overflow: 1481 case Builtin::BI__builtin_ssubll_overflow: 1482 IntrinsicId = llvm::Intrinsic::ssub_with_overflow; 1483 break; 1484 case Builtin::BI__builtin_smul_overflow: 1485 case Builtin::BI__builtin_smull_overflow: 1486 case Builtin::BI__builtin_smulll_overflow: 1487 IntrinsicId = llvm::Intrinsic::smul_with_overflow; 1488 break; 1489 } 1490 1491 1492 llvm::Value *Carry; 1493 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); 1494 llvm::StoreInst *SumOutStore = Builder.CreateStore(Sum, SumOutPtr.first); 1495 SumOutStore->setAlignment(SumOutPtr.second); 1496 1497 return RValue::get(Carry); 1498 } 1499 case Builtin::BI__builtin_addressof: 1500 return RValue::get(EmitLValue(E->getArg(0)).getAddress()); 1501 case Builtin::BI__noop: 1502 return RValue::get(0); 1503 } 1504 1505 // If this is an alias for a lib function (e.g. __builtin_sin), emit 1506 // the call using the normal call path, but using the unmangled 1507 // version of the function name. 1508 if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) 1509 return emitLibraryCall(*this, FD, E, 1510 CGM.getBuiltinLibFunction(FD, BuiltinID)); 1511 1512 // If this is a predefined lib function (e.g. malloc), emit the call 1513 // using exactly the normal call path. 1514 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 1515 return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee())); 1516 1517 // See if we have a target specific intrinsic. 1518 const char *Name = getContext().BuiltinInfo.GetName(BuiltinID); 1519 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 1520 if (const char *Prefix = 1521 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) 1522 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name); 1523 1524 if (IntrinsicID != Intrinsic::not_intrinsic) { 1525 SmallVector<Value*, 16> Args; 1526 1527 // Find out if any arguments are required to be integer constant 1528 // expressions. 1529 unsigned ICEArguments = 0; 1530 ASTContext::GetBuiltinTypeError Error; 1531 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 1532 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 1533 1534 Function *F = CGM.getIntrinsic(IntrinsicID); 1535 llvm::FunctionType *FTy = F->getFunctionType(); 1536 1537 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 1538 Value *ArgValue; 1539 // If this is a normal argument, just emit it as a scalar. 1540 if ((ICEArguments & (1 << i)) == 0) { 1541 ArgValue = EmitScalarExpr(E->getArg(i)); 1542 } else { 1543 // If this is required to be a constant, constant fold it so that we 1544 // know that the generated intrinsic gets a ConstantInt. 1545 llvm::APSInt Result; 1546 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 1547 assert(IsConst && "Constant arg isn't actually constant?"); 1548 (void)IsConst; 1549 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 1550 } 1551 1552 // If the intrinsic arg type is different from the builtin arg type 1553 // we need to do a bit cast. 1554 llvm::Type *PTy = FTy->getParamType(i); 1555 if (PTy != ArgValue->getType()) { 1556 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 1557 "Must be able to losslessly bit cast to param"); 1558 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 1559 } 1560 1561 Args.push_back(ArgValue); 1562 } 1563 1564 Value *V = Builder.CreateCall(F, Args); 1565 QualType BuiltinRetType = E->getType(); 1566 1567 llvm::Type *RetTy = VoidTy; 1568 if (!BuiltinRetType->isVoidType()) 1569 RetTy = ConvertType(BuiltinRetType); 1570 1571 if (RetTy != V->getType()) { 1572 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 1573 "Must be able to losslessly bit cast result type"); 1574 V = Builder.CreateBitCast(V, RetTy); 1575 } 1576 1577 return RValue::get(V); 1578 } 1579 1580 // See if we have a target specific builtin that needs to be lowered. 1581 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 1582 return RValue::get(V); 1583 1584 ErrorUnsupported(E, "builtin function"); 1585 1586 // Unknown builtin, for now just dump it out and return undef. 1587 return GetUndefRValue(E->getType()); 1588 } 1589 1590 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 1591 const CallExpr *E) { 1592 switch (getTarget().getTriple().getArch()) { 1593 case llvm::Triple::aarch64: 1594 return EmitAArch64BuiltinExpr(BuiltinID, E); 1595 case llvm::Triple::arm: 1596 case llvm::Triple::thumb: 1597 return EmitARMBuiltinExpr(BuiltinID, E); 1598 case llvm::Triple::x86: 1599 case llvm::Triple::x86_64: 1600 return EmitX86BuiltinExpr(BuiltinID, E); 1601 case llvm::Triple::ppc: 1602 case llvm::Triple::ppc64: 1603 case llvm::Triple::ppc64le: 1604 return EmitPPCBuiltinExpr(BuiltinID, E); 1605 default: 1606 return 0; 1607 } 1608 } 1609 1610 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, 1611 NeonTypeFlags TypeFlags, 1612 bool V1Ty=false) { 1613 int IsQuad = TypeFlags.isQuad(); 1614 switch (TypeFlags.getEltType()) { 1615 case NeonTypeFlags::Int8: 1616 case NeonTypeFlags::Poly8: 1617 return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); 1618 case NeonTypeFlags::Int16: 1619 case NeonTypeFlags::Poly16: 1620 case NeonTypeFlags::Float16: 1621 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 1622 case NeonTypeFlags::Int32: 1623 return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); 1624 case NeonTypeFlags::Int64: 1625 case NeonTypeFlags::Poly64: 1626 return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); 1627 case NeonTypeFlags::Poly128: 1628 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. 1629 // There is a lot of i128 and f128 API missing. 1630 // so we use v16i8 to represent poly128 and get pattern matched. 1631 return llvm::VectorType::get(CGF->Int8Ty, 16); 1632 case NeonTypeFlags::Float32: 1633 return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); 1634 case NeonTypeFlags::Float64: 1635 return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); 1636 } 1637 llvm_unreachable("Unknown vector element type!"); 1638 } 1639 1640 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 1641 unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements(); 1642 Value* SV = llvm::ConstantVector::getSplat(nElts, C); 1643 return Builder.CreateShuffleVector(V, V, SV, "lane"); 1644 } 1645 1646 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 1647 const char *name, 1648 unsigned shift, bool rightshift) { 1649 unsigned j = 0; 1650 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 1651 ai != ae; ++ai, ++j) 1652 if (shift > 0 && shift == j) 1653 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 1654 else 1655 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 1656 1657 return Builder.CreateCall(F, Ops, name); 1658 } 1659 1660 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 1661 bool neg) { 1662 int SV = cast<ConstantInt>(V)->getSExtValue(); 1663 1664 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 1665 llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV); 1666 return llvm::ConstantVector::getSplat(VTy->getNumElements(), C); 1667 } 1668 1669 // \brief Right-shift a vector by a constant. 1670 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, 1671 llvm::Type *Ty, bool usgn, 1672 const char *name) { 1673 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 1674 1675 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); 1676 int EltSize = VTy->getScalarSizeInBits(); 1677 1678 Vec = Builder.CreateBitCast(Vec, Ty); 1679 1680 // lshr/ashr are undefined when the shift amount is equal to the vector 1681 // element size. 1682 if (ShiftAmt == EltSize) { 1683 if (usgn) { 1684 // Right-shifting an unsigned value by its size yields 0. 1685 llvm::Constant *Zero = ConstantInt::get(VTy->getElementType(), 0); 1686 return llvm::ConstantVector::getSplat(VTy->getNumElements(), Zero); 1687 } else { 1688 // Right-shifting a signed value by its size is equivalent 1689 // to a shift of size-1. 1690 --ShiftAmt; 1691 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); 1692 } 1693 } 1694 1695 Shift = EmitNeonShiftVector(Shift, Ty, false); 1696 if (usgn) 1697 return Builder.CreateLShr(Vec, Shift, name); 1698 else 1699 return Builder.CreateAShr(Vec, Shift, name); 1700 } 1701 1702 /// GetPointeeAlignment - Given an expression with a pointer type, find the 1703 /// alignment of the type referenced by the pointer. Skip over implicit 1704 /// casts. 1705 std::pair<llvm::Value*, unsigned> 1706 CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) { 1707 assert(Addr->getType()->isPointerType()); 1708 Addr = Addr->IgnoreParens(); 1709 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Addr)) { 1710 if ((ICE->getCastKind() == CK_BitCast || ICE->getCastKind() == CK_NoOp) && 1711 ICE->getSubExpr()->getType()->isPointerType()) { 1712 std::pair<llvm::Value*, unsigned> Ptr = 1713 EmitPointerWithAlignment(ICE->getSubExpr()); 1714 Ptr.first = Builder.CreateBitCast(Ptr.first, 1715 ConvertType(Addr->getType())); 1716 return Ptr; 1717 } else if (ICE->getCastKind() == CK_ArrayToPointerDecay) { 1718 LValue LV = EmitLValue(ICE->getSubExpr()); 1719 unsigned Align = LV.getAlignment().getQuantity(); 1720 if (!Align) { 1721 // FIXME: Once LValues are fixed to always set alignment, 1722 // zap this code. 1723 QualType PtTy = ICE->getSubExpr()->getType(); 1724 if (!PtTy->isIncompleteType()) 1725 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 1726 else 1727 Align = 1; 1728 } 1729 return std::make_pair(LV.getAddress(), Align); 1730 } 1731 } 1732 if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(Addr)) { 1733 if (UO->getOpcode() == UO_AddrOf) { 1734 LValue LV = EmitLValue(UO->getSubExpr()); 1735 unsigned Align = LV.getAlignment().getQuantity(); 1736 if (!Align) { 1737 // FIXME: Once LValues are fixed to always set alignment, 1738 // zap this code. 1739 QualType PtTy = UO->getSubExpr()->getType(); 1740 if (!PtTy->isIncompleteType()) 1741 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 1742 else 1743 Align = 1; 1744 } 1745 return std::make_pair(LV.getAddress(), Align); 1746 } 1747 } 1748 1749 unsigned Align = 1; 1750 QualType PtTy = Addr->getType()->getPointeeType(); 1751 if (!PtTy->isIncompleteType()) 1752 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 1753 1754 return std::make_pair(EmitScalarExpr(Addr), Align); 1755 } 1756 1757 static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, 1758 unsigned BuiltinID, 1759 const CallExpr *E) { 1760 unsigned int Int = 0; 1761 unsigned IntTypes = 0; 1762 enum { 1763 ScalarRet = (1 << 0), 1764 VectorRet = (1 << 1), 1765 ScalarArg0 = (1 << 2), 1766 VectorGetArg0 = (1 << 3), 1767 VectorCastArg0 = (1 << 4), 1768 ScalarArg1 = (1 << 5), 1769 VectorGetArg1 = (1 << 6), 1770 VectorCastArg1 = (1 << 7), 1771 ScalarFpCmpzArg1 = (1 << 8) 1772 }; 1773 const char *s = NULL; 1774 1775 SmallVector<Value *, 4> Ops; 1776 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 1777 Ops.push_back(CGF.EmitScalarExpr(E->getArg(i))); 1778 } 1779 1780 // AArch64 scalar builtins are not overloaded, they do not have an extra 1781 // argument that specifies the vector type, need to handle each case. 1782 switch (BuiltinID) { 1783 default: break; 1784 case AArch64::BI__builtin_neon_vdups_lane_f32: 1785 case AArch64::BI__builtin_neon_vdupd_lane_f64: 1786 case AArch64::BI__builtin_neon_vdups_laneq_f32: 1787 case AArch64::BI__builtin_neon_vdupd_laneq_f64: { 1788 return CGF.Builder.CreateExtractElement(Ops[0], Ops[1], "vdup_lane"); 1789 } 1790 case AArch64::BI__builtin_neon_vdupb_lane_i8: 1791 case AArch64::BI__builtin_neon_vduph_lane_i16: 1792 case AArch64::BI__builtin_neon_vdups_lane_i32: 1793 case AArch64::BI__builtin_neon_vdupd_lane_i64: 1794 case AArch64::BI__builtin_neon_vdupb_laneq_i8: 1795 case AArch64::BI__builtin_neon_vduph_laneq_i16: 1796 case AArch64::BI__builtin_neon_vdups_laneq_i32: 1797 case AArch64::BI__builtin_neon_vdupd_laneq_i64: { 1798 // The backend treats Neon scalar types as v1ix types 1799 // So we want to dup lane from any vector to v1ix vector 1800 // with shufflevector 1801 s = "vdup_lane"; 1802 Value* SV = llvm::ConstantVector::getSplat(1, cast<ConstantInt>(Ops[1])); 1803 Value *Result = CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], SV, s); 1804 llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); 1805 // AArch64 intrinsic one-element vector type cast to 1806 // scalar type expected by the builtin 1807 return CGF.Builder.CreateBitCast(Result, Ty, s); 1808 } 1809 case AArch64::BI__builtin_neon_vqdmlalh_lane_s16 : 1810 case AArch64::BI__builtin_neon_vqdmlalh_laneq_s16 : 1811 case AArch64::BI__builtin_neon_vqdmlals_lane_s32 : 1812 case AArch64::BI__builtin_neon_vqdmlals_laneq_s32 : 1813 case AArch64::BI__builtin_neon_vqdmlslh_lane_s16 : 1814 case AArch64::BI__builtin_neon_vqdmlslh_laneq_s16 : 1815 case AArch64::BI__builtin_neon_vqdmlsls_lane_s32 : 1816 case AArch64::BI__builtin_neon_vqdmlsls_laneq_s32 : { 1817 Int = Intrinsic::arm_neon_vqadds; 1818 if (BuiltinID == AArch64::BI__builtin_neon_vqdmlslh_lane_s16 || 1819 BuiltinID == AArch64::BI__builtin_neon_vqdmlslh_laneq_s16 || 1820 BuiltinID == AArch64::BI__builtin_neon_vqdmlsls_lane_s32 || 1821 BuiltinID == AArch64::BI__builtin_neon_vqdmlsls_laneq_s32) { 1822 Int = Intrinsic::arm_neon_vqsubs; 1823 } 1824 // create vqdmull call with b * c[i] 1825 llvm::Type *Ty = CGF.ConvertType(E->getArg(1)->getType()); 1826 llvm::VectorType *OpVTy = llvm::VectorType::get(Ty, 1); 1827 Ty = CGF.ConvertType(E->getArg(0)->getType()); 1828 llvm::VectorType *ResVTy = llvm::VectorType::get(Ty, 1); 1829 Value *F = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, ResVTy); 1830 Value *V = UndefValue::get(OpVTy); 1831 llvm::Constant *CI = ConstantInt::get(CGF.Int32Ty, 0); 1832 SmallVector<Value *, 2> MulOps; 1833 MulOps.push_back(Ops[1]); 1834 MulOps.push_back(Ops[2]); 1835 MulOps[0] = CGF.Builder.CreateInsertElement(V, MulOps[0], CI); 1836 MulOps[1] = CGF.Builder.CreateExtractElement(MulOps[1], Ops[3], "extract"); 1837 MulOps[1] = CGF.Builder.CreateInsertElement(V, MulOps[1], CI); 1838 Value *MulRes = CGF.Builder.CreateCall2(F, MulOps[0], MulOps[1]); 1839 // create vqadds call with a +/- vqdmull result 1840 F = CGF.CGM.getIntrinsic(Int, ResVTy); 1841 SmallVector<Value *, 2> AddOps; 1842 AddOps.push_back(Ops[0]); 1843 AddOps.push_back(MulRes); 1844 V = UndefValue::get(ResVTy); 1845 AddOps[0] = CGF.Builder.CreateInsertElement(V, AddOps[0], CI); 1846 Value *AddRes = CGF.Builder.CreateCall2(F, AddOps[0], AddOps[1]); 1847 return CGF.Builder.CreateBitCast(AddRes, Ty); 1848 } 1849 case AArch64::BI__builtin_neon_vfmas_lane_f32: 1850 case AArch64::BI__builtin_neon_vfmas_laneq_f32: 1851 case AArch64::BI__builtin_neon_vfmad_lane_f64: 1852 case AArch64::BI__builtin_neon_vfmad_laneq_f64: { 1853 llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); 1854 Value *F = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty); 1855 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 1856 return CGF.Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 1857 } 1858 // Scalar Floating-point Multiply Extended 1859 case AArch64::BI__builtin_neon_vmulxs_f32: 1860 case AArch64::BI__builtin_neon_vmulxd_f64: { 1861 Int = Intrinsic::aarch64_neon_vmulx; 1862 llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); 1863 return CGF.EmitNeonCall(CGF.CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 1864 } 1865 case AArch64::BI__builtin_neon_vmul_n_f64: { 1866 // v1f64 vmul_n_f64 should be mapped to Neon scalar mul lane 1867 llvm::Type *VTy = GetNeonType(&CGF, 1868 NeonTypeFlags(NeonTypeFlags::Float64, false, false)); 1869 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], VTy); 1870 llvm::Value *Idx = llvm::ConstantInt::get(CGF.Int32Ty, 0); 1871 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], Idx, "extract"); 1872 Value *Result = CGF.Builder.CreateFMul(Ops[0], Ops[1]); 1873 return CGF.Builder.CreateBitCast(Result, VTy); 1874 } 1875 case AArch64::BI__builtin_neon_vget_lane_i8: 1876 case AArch64::BI__builtin_neon_vget_lane_i16: 1877 case AArch64::BI__builtin_neon_vget_lane_i32: 1878 case AArch64::BI__builtin_neon_vget_lane_i64: 1879 case AArch64::BI__builtin_neon_vget_lane_f32: 1880 case AArch64::BI__builtin_neon_vget_lane_f64: 1881 case AArch64::BI__builtin_neon_vgetq_lane_i8: 1882 case AArch64::BI__builtin_neon_vgetq_lane_i16: 1883 case AArch64::BI__builtin_neon_vgetq_lane_i32: 1884 case AArch64::BI__builtin_neon_vgetq_lane_i64: 1885 case AArch64::BI__builtin_neon_vgetq_lane_f32: 1886 case AArch64::BI__builtin_neon_vgetq_lane_f64: 1887 return CGF.EmitARMBuiltinExpr(ARM::BI__builtin_neon_vget_lane_i8, E); 1888 case AArch64::BI__builtin_neon_vset_lane_i8: 1889 case AArch64::BI__builtin_neon_vset_lane_i16: 1890 case AArch64::BI__builtin_neon_vset_lane_i32: 1891 case AArch64::BI__builtin_neon_vset_lane_i64: 1892 case AArch64::BI__builtin_neon_vset_lane_f32: 1893 case AArch64::BI__builtin_neon_vset_lane_f64: 1894 case AArch64::BI__builtin_neon_vsetq_lane_i8: 1895 case AArch64::BI__builtin_neon_vsetq_lane_i16: 1896 case AArch64::BI__builtin_neon_vsetq_lane_i32: 1897 case AArch64::BI__builtin_neon_vsetq_lane_i64: 1898 case AArch64::BI__builtin_neon_vsetq_lane_f32: 1899 case AArch64::BI__builtin_neon_vsetq_lane_f64: 1900 return CGF.EmitARMBuiltinExpr(ARM::BI__builtin_neon_vset_lane_i8, E); 1901 // Crypto 1902 case AArch64::BI__builtin_neon_vsha1h_u32: 1903 Int = Intrinsic::arm_neon_sha1h; 1904 s = "sha1h"; IntTypes = VectorRet; break; 1905 case AArch64::BI__builtin_neon_vsha1cq_u32: 1906 Int = Intrinsic::aarch64_neon_sha1c; 1907 s = "sha1c"; break; 1908 case AArch64::BI__builtin_neon_vsha1pq_u32: 1909 Int = Intrinsic::aarch64_neon_sha1p; 1910 s = "sha1p"; break; 1911 case AArch64::BI__builtin_neon_vsha1mq_u32: 1912 Int = Intrinsic::aarch64_neon_sha1m; 1913 s = "sha1m"; break; 1914 // Scalar Add 1915 case AArch64::BI__builtin_neon_vaddd_s64: 1916 Int = Intrinsic::aarch64_neon_vaddds; 1917 s = "vaddds"; break; 1918 case AArch64::BI__builtin_neon_vaddd_u64: 1919 Int = Intrinsic::aarch64_neon_vadddu; 1920 s = "vadddu"; break; 1921 // Scalar Sub 1922 case AArch64::BI__builtin_neon_vsubd_s64: 1923 Int = Intrinsic::aarch64_neon_vsubds; 1924 s = "vsubds"; break; 1925 case AArch64::BI__builtin_neon_vsubd_u64: 1926 Int = Intrinsic::aarch64_neon_vsubdu; 1927 s = "vsubdu"; break; 1928 // Scalar Saturating Add 1929 case AArch64::BI__builtin_neon_vqaddb_s8: 1930 case AArch64::BI__builtin_neon_vqaddh_s16: 1931 case AArch64::BI__builtin_neon_vqadds_s32: 1932 case AArch64::BI__builtin_neon_vqaddd_s64: 1933 Int = Intrinsic::arm_neon_vqadds; 1934 s = "vqadds"; IntTypes = VectorRet; break; 1935 case AArch64::BI__builtin_neon_vqaddb_u8: 1936 case AArch64::BI__builtin_neon_vqaddh_u16: 1937 case AArch64::BI__builtin_neon_vqadds_u32: 1938 case AArch64::BI__builtin_neon_vqaddd_u64: 1939 Int = Intrinsic::arm_neon_vqaddu; 1940 s = "vqaddu"; IntTypes = VectorRet; break; 1941 // Scalar Saturating Sub 1942 case AArch64::BI__builtin_neon_vqsubb_s8: 1943 case AArch64::BI__builtin_neon_vqsubh_s16: 1944 case AArch64::BI__builtin_neon_vqsubs_s32: 1945 case AArch64::BI__builtin_neon_vqsubd_s64: 1946 Int = Intrinsic::arm_neon_vqsubs; 1947 s = "vqsubs"; IntTypes = VectorRet; break; 1948 case AArch64::BI__builtin_neon_vqsubb_u8: 1949 case AArch64::BI__builtin_neon_vqsubh_u16: 1950 case AArch64::BI__builtin_neon_vqsubs_u32: 1951 case AArch64::BI__builtin_neon_vqsubd_u64: 1952 Int = Intrinsic::arm_neon_vqsubu; 1953 s = "vqsubu"; IntTypes = VectorRet; break; 1954 // Scalar Shift Left 1955 case AArch64::BI__builtin_neon_vshld_s64: 1956 Int = Intrinsic::aarch64_neon_vshlds; 1957 s = "vshlds"; break; 1958 case AArch64::BI__builtin_neon_vshld_u64: 1959 Int = Intrinsic::aarch64_neon_vshldu; 1960 s = "vshldu"; break; 1961 // Scalar Saturating Shift Left 1962 case AArch64::BI__builtin_neon_vqshlb_s8: 1963 case AArch64::BI__builtin_neon_vqshlh_s16: 1964 case AArch64::BI__builtin_neon_vqshls_s32: 1965 case AArch64::BI__builtin_neon_vqshld_s64: 1966 Int = Intrinsic::aarch64_neon_vqshls; 1967 s = "vqshls"; IntTypes = VectorRet; break; 1968 case AArch64::BI__builtin_neon_vqshlb_u8: 1969 case AArch64::BI__builtin_neon_vqshlh_u16: 1970 case AArch64::BI__builtin_neon_vqshls_u32: 1971 case AArch64::BI__builtin_neon_vqshld_u64: 1972 Int = Intrinsic::aarch64_neon_vqshlu; 1973 s = "vqshlu"; IntTypes = VectorRet; break; 1974 // Scalar Rouding Shift Left 1975 case AArch64::BI__builtin_neon_vrshld_s64: 1976 Int = Intrinsic::aarch64_neon_vrshlds; 1977 s = "vrshlds"; break; 1978 case AArch64::BI__builtin_neon_vrshld_u64: 1979 Int = Intrinsic::aarch64_neon_vrshldu; 1980 s = "vrshldu"; break; 1981 // Scalar Saturating Rouding Shift Left 1982 case AArch64::BI__builtin_neon_vqrshlb_s8: 1983 case AArch64::BI__builtin_neon_vqrshlh_s16: 1984 case AArch64::BI__builtin_neon_vqrshls_s32: 1985 case AArch64::BI__builtin_neon_vqrshld_s64: 1986 Int = Intrinsic::aarch64_neon_vqrshls; 1987 s = "vqrshls"; IntTypes = VectorRet; break; 1988 case AArch64::BI__builtin_neon_vqrshlb_u8: 1989 case AArch64::BI__builtin_neon_vqrshlh_u16: 1990 case AArch64::BI__builtin_neon_vqrshls_u32: 1991 case AArch64::BI__builtin_neon_vqrshld_u64: 1992 Int = Intrinsic::aarch64_neon_vqrshlu; 1993 s = "vqrshlu"; IntTypes = VectorRet; break; 1994 // Scalar Reduce Pairwise Add 1995 case AArch64::BI__builtin_neon_vpaddd_s64: 1996 case AArch64::BI__builtin_neon_vpaddd_u64: 1997 Int = Intrinsic::aarch64_neon_vpadd; 1998 s = "vpadd"; break; 1999 case AArch64::BI__builtin_neon_vaddv_f32: 2000 case AArch64::BI__builtin_neon_vaddvq_f32: 2001 case AArch64::BI__builtin_neon_vaddvq_f64: 2002 case AArch64::BI__builtin_neon_vpadds_f32: 2003 case AArch64::BI__builtin_neon_vpaddd_f64: 2004 Int = Intrinsic::aarch64_neon_vpfadd; 2005 s = "vpfadd"; IntTypes = ScalarRet | VectorCastArg0; break; 2006 // Scalar Reduce Pairwise Floating Point Max 2007 case AArch64::BI__builtin_neon_vmaxv_f32: 2008 case AArch64::BI__builtin_neon_vpmaxs_f32: 2009 case AArch64::BI__builtin_neon_vmaxvq_f64: 2010 case AArch64::BI__builtin_neon_vpmaxqd_f64: 2011 Int = Intrinsic::aarch64_neon_vpmax; 2012 s = "vpmax"; IntTypes = ScalarRet | VectorCastArg0; break; 2013 // Scalar Reduce Pairwise Floating Point Min 2014 case AArch64::BI__builtin_neon_vminv_f32: 2015 case AArch64::BI__builtin_neon_vpmins_f32: 2016 case AArch64::BI__builtin_neon_vminvq_f64: 2017 case AArch64::BI__builtin_neon_vpminqd_f64: 2018 Int = Intrinsic::aarch64_neon_vpmin; 2019 s = "vpmin"; IntTypes = ScalarRet | VectorCastArg0; break; 2020 // Scalar Reduce Pairwise Floating Point Maxnm 2021 case AArch64::BI__builtin_neon_vmaxnmv_f32: 2022 case AArch64::BI__builtin_neon_vpmaxnms_f32: 2023 case AArch64::BI__builtin_neon_vmaxnmvq_f64: 2024 case AArch64::BI__builtin_neon_vpmaxnmqd_f64: 2025 Int = Intrinsic::aarch64_neon_vpfmaxnm; 2026 s = "vpfmaxnm"; IntTypes = ScalarRet | VectorCastArg0; break; 2027 // Scalar Reduce Pairwise Floating Point Minnm 2028 case AArch64::BI__builtin_neon_vminnmv_f32: 2029 case AArch64::BI__builtin_neon_vpminnms_f32: 2030 case AArch64::BI__builtin_neon_vminnmvq_f64: 2031 case AArch64::BI__builtin_neon_vpminnmqd_f64: 2032 Int = Intrinsic::aarch64_neon_vpfminnm; 2033 s = "vpfminnm"; IntTypes = ScalarRet | VectorCastArg0; break; 2034 // The followings are intrinsics with scalar results generated AcrossVec vectors 2035 case AArch64::BI__builtin_neon_vaddlv_s8: 2036 case AArch64::BI__builtin_neon_vaddlv_s16: 2037 case AArch64::BI__builtin_neon_vaddlv_s32: 2038 case AArch64::BI__builtin_neon_vaddlvq_s8: 2039 case AArch64::BI__builtin_neon_vaddlvq_s16: 2040 case AArch64::BI__builtin_neon_vaddlvq_s32: 2041 Int = Intrinsic::aarch64_neon_saddlv; 2042 s = "saddlv"; IntTypes = VectorRet | VectorCastArg1; break; 2043 case AArch64::BI__builtin_neon_vaddlv_u8: 2044 case AArch64::BI__builtin_neon_vaddlv_u16: 2045 case AArch64::BI__builtin_neon_vaddlv_u32: 2046 case AArch64::BI__builtin_neon_vaddlvq_u8: 2047 case AArch64::BI__builtin_neon_vaddlvq_u16: 2048 case AArch64::BI__builtin_neon_vaddlvq_u32: 2049 Int = Intrinsic::aarch64_neon_uaddlv; 2050 s = "uaddlv"; IntTypes = VectorRet | VectorCastArg1; break; 2051 case AArch64::BI__builtin_neon_vmaxv_s8: 2052 case AArch64::BI__builtin_neon_vmaxv_s16: 2053 case AArch64::BI__builtin_neon_vmaxv_s32: 2054 case AArch64::BI__builtin_neon_vmaxvq_s8: 2055 case AArch64::BI__builtin_neon_vmaxvq_s16: 2056 case AArch64::BI__builtin_neon_vmaxvq_s32: 2057 Int = Intrinsic::aarch64_neon_smaxv; 2058 s = "smaxv"; IntTypes = VectorRet | VectorCastArg1; break; 2059 case AArch64::BI__builtin_neon_vmaxv_u8: 2060 case AArch64::BI__builtin_neon_vmaxv_u16: 2061 case AArch64::BI__builtin_neon_vmaxv_u32: 2062 case AArch64::BI__builtin_neon_vmaxvq_u8: 2063 case AArch64::BI__builtin_neon_vmaxvq_u16: 2064 case AArch64::BI__builtin_neon_vmaxvq_u32: 2065 Int = Intrinsic::aarch64_neon_umaxv; 2066 s = "umaxv"; IntTypes = VectorRet | VectorCastArg1; break; 2067 case AArch64::BI__builtin_neon_vminv_s8: 2068 case AArch64::BI__builtin_neon_vminv_s16: 2069 case AArch64::BI__builtin_neon_vminv_s32: 2070 case AArch64::BI__builtin_neon_vminvq_s8: 2071 case AArch64::BI__builtin_neon_vminvq_s16: 2072 case AArch64::BI__builtin_neon_vminvq_s32: 2073 Int = Intrinsic::aarch64_neon_sminv; 2074 s = "sminv"; IntTypes = VectorRet | VectorCastArg1; break; 2075 case AArch64::BI__builtin_neon_vminv_u8: 2076 case AArch64::BI__builtin_neon_vminv_u16: 2077 case AArch64::BI__builtin_neon_vminv_u32: 2078 case AArch64::BI__builtin_neon_vminvq_u8: 2079 case AArch64::BI__builtin_neon_vminvq_u16: 2080 case AArch64::BI__builtin_neon_vminvq_u32: 2081 Int = Intrinsic::aarch64_neon_uminv; 2082 s = "uminv"; IntTypes = VectorRet | VectorCastArg1; break; 2083 case AArch64::BI__builtin_neon_vaddv_s8: 2084 case AArch64::BI__builtin_neon_vaddv_s16: 2085 case AArch64::BI__builtin_neon_vaddv_s32: 2086 case AArch64::BI__builtin_neon_vaddvq_s8: 2087 case AArch64::BI__builtin_neon_vaddvq_s16: 2088 case AArch64::BI__builtin_neon_vaddvq_s32: 2089 case AArch64::BI__builtin_neon_vaddvq_s64: 2090 case AArch64::BI__builtin_neon_vaddv_u8: 2091 case AArch64::BI__builtin_neon_vaddv_u16: 2092 case AArch64::BI__builtin_neon_vaddv_u32: 2093 case AArch64::BI__builtin_neon_vaddvq_u8: 2094 case AArch64::BI__builtin_neon_vaddvq_u16: 2095 case AArch64::BI__builtin_neon_vaddvq_u32: 2096 case AArch64::BI__builtin_neon_vaddvq_u64: 2097 Int = Intrinsic::aarch64_neon_vaddv; 2098 s = "vaddv"; IntTypes = VectorRet | VectorCastArg1; break; 2099 case AArch64::BI__builtin_neon_vmaxvq_f32: 2100 Int = Intrinsic::aarch64_neon_vmaxv; 2101 s = "vmaxv"; break; 2102 case AArch64::BI__builtin_neon_vminvq_f32: 2103 Int = Intrinsic::aarch64_neon_vminv; 2104 s = "vminv"; break; 2105 case AArch64::BI__builtin_neon_vmaxnmvq_f32: 2106 Int = Intrinsic::aarch64_neon_vmaxnmv; 2107 s = "vmaxnmv"; break; 2108 case AArch64::BI__builtin_neon_vminnmvq_f32: 2109 Int = Intrinsic::aarch64_neon_vminnmv; 2110 s = "vminnmv"; break; 2111 // Scalar Integer Saturating Doubling Multiply Half High 2112 case AArch64::BI__builtin_neon_vqdmulhh_s16: 2113 case AArch64::BI__builtin_neon_vqdmulhs_s32: 2114 Int = Intrinsic::arm_neon_vqdmulh; 2115 s = "vqdmulh"; IntTypes = VectorRet; break; 2116 // Scalar Integer Saturating Rounding Doubling Multiply Half High 2117 case AArch64::BI__builtin_neon_vqrdmulhh_s16: 2118 case AArch64::BI__builtin_neon_vqrdmulhs_s32: 2119 Int = Intrinsic::arm_neon_vqrdmulh; 2120 s = "vqrdmulh"; IntTypes = VectorRet; break; 2121 // Scalar Floating-point Reciprocal Step 2122 case AArch64::BI__builtin_neon_vrecpss_f32: 2123 case AArch64::BI__builtin_neon_vrecpsd_f64: 2124 Int = Intrinsic::aarch64_neon_vrecps; 2125 s = "vrecps"; IntTypes = ScalarRet; break; 2126 // Scalar Floating-point Reciprocal Square Root Step 2127 case AArch64::BI__builtin_neon_vrsqrtss_f32: 2128 case AArch64::BI__builtin_neon_vrsqrtsd_f64: 2129 Int = Intrinsic::aarch64_neon_vrsqrts; 2130 s = "vrsqrts"; IntTypes = ScalarRet; break; 2131 // Scalar Signed Integer Convert To Floating-point 2132 case AArch64::BI__builtin_neon_vcvts_f32_s32: 2133 case AArch64::BI__builtin_neon_vcvtd_f64_s64: 2134 Int = Intrinsic::aarch64_neon_vcvtint2fps; 2135 s = "vcvtf"; IntTypes = ScalarRet | VectorGetArg0; break; 2136 // Scalar Unsigned Integer Convert To Floating-point 2137 case AArch64::BI__builtin_neon_vcvts_f32_u32: 2138 case AArch64::BI__builtin_neon_vcvtd_f64_u64: 2139 Int = Intrinsic::aarch64_neon_vcvtint2fpu; 2140 s = "vcvtf"; IntTypes = ScalarRet | VectorGetArg0; break; 2141 // Scalar Floating-point Converts 2142 case AArch64::BI__builtin_neon_vcvtxd_f32_f64: 2143 Int = Intrinsic::aarch64_neon_fcvtxn; 2144 s = "vcvtxn"; break; 2145 case AArch64::BI__builtin_neon_vcvtas_s32_f32: 2146 case AArch64::BI__builtin_neon_vcvtad_s64_f64: 2147 Int = Intrinsic::aarch64_neon_fcvtas; 2148 s = "vcvtas"; IntTypes = VectorRet | ScalarArg1; break; 2149 case AArch64::BI__builtin_neon_vcvtas_u32_f32: 2150 case AArch64::BI__builtin_neon_vcvtad_u64_f64: 2151 Int = Intrinsic::aarch64_neon_fcvtau; 2152 s = "vcvtau"; IntTypes = VectorRet | ScalarArg1; break; 2153 case AArch64::BI__builtin_neon_vcvtms_s32_f32: 2154 case AArch64::BI__builtin_neon_vcvtmd_s64_f64: 2155 Int = Intrinsic::aarch64_neon_fcvtms; 2156 s = "vcvtms"; IntTypes = VectorRet | ScalarArg1; break; 2157 case AArch64::BI__builtin_neon_vcvtms_u32_f32: 2158 case AArch64::BI__builtin_neon_vcvtmd_u64_f64: 2159 Int = Intrinsic::aarch64_neon_fcvtmu; 2160 s = "vcvtmu"; IntTypes = VectorRet | ScalarArg1; break; 2161 case AArch64::BI__builtin_neon_vcvtns_s32_f32: 2162 case AArch64::BI__builtin_neon_vcvtnd_s64_f64: 2163 Int = Intrinsic::aarch64_neon_fcvtns; 2164 s = "vcvtns"; IntTypes = VectorRet | ScalarArg1; break; 2165 case AArch64::BI__builtin_neon_vcvtns_u32_f32: 2166 case AArch64::BI__builtin_neon_vcvtnd_u64_f64: 2167 Int = Intrinsic::aarch64_neon_fcvtnu; 2168 s = "vcvtnu"; IntTypes = VectorRet | ScalarArg1; break; 2169 case AArch64::BI__builtin_neon_vcvtps_s32_f32: 2170 case AArch64::BI__builtin_neon_vcvtpd_s64_f64: 2171 Int = Intrinsic::aarch64_neon_fcvtps; 2172 s = "vcvtps"; IntTypes = VectorRet | ScalarArg1; break; 2173 case AArch64::BI__builtin_neon_vcvtps_u32_f32: 2174 case AArch64::BI__builtin_neon_vcvtpd_u64_f64: 2175 Int = Intrinsic::aarch64_neon_fcvtpu; 2176 s = "vcvtpu"; IntTypes = VectorRet | ScalarArg1; break; 2177 case AArch64::BI__builtin_neon_vcvts_s32_f32: 2178 case AArch64::BI__builtin_neon_vcvtd_s64_f64: 2179 Int = Intrinsic::aarch64_neon_fcvtzs; 2180 s = "vcvtzs"; IntTypes = VectorRet | ScalarArg1; break; 2181 case AArch64::BI__builtin_neon_vcvts_u32_f32: 2182 case AArch64::BI__builtin_neon_vcvtd_u64_f64: 2183 Int = Intrinsic::aarch64_neon_fcvtzu; 2184 s = "vcvtzu"; IntTypes = VectorRet | ScalarArg1; break; 2185 // Scalar Floating-point Reciprocal Estimate 2186 case AArch64::BI__builtin_neon_vrecpes_f32: 2187 case AArch64::BI__builtin_neon_vrecped_f64: 2188 Int = Intrinsic::aarch64_neon_vrecpe; 2189 s = "vrecpe"; IntTypes = ScalarRet; break; 2190 // Scalar Floating-point Reciprocal Exponent 2191 case AArch64::BI__builtin_neon_vrecpxs_f32: 2192 case AArch64::BI__builtin_neon_vrecpxd_f64: 2193 Int = Intrinsic::aarch64_neon_vrecpx; 2194 s = "vrecpx"; IntTypes = ScalarRet; break; 2195 // Scalar Floating-point Reciprocal Square Root Estimate 2196 case AArch64::BI__builtin_neon_vrsqrtes_f32: 2197 case AArch64::BI__builtin_neon_vrsqrted_f64: 2198 Int = Intrinsic::aarch64_neon_vrsqrte; 2199 s = "vrsqrte"; IntTypes = ScalarRet; break; 2200 // Scalar Compare Equal 2201 case AArch64::BI__builtin_neon_vceqd_s64: 2202 case AArch64::BI__builtin_neon_vceqd_u64: 2203 Int = Intrinsic::aarch64_neon_vceq; s = "vceq"; 2204 IntTypes = VectorRet | VectorGetArg0 | VectorGetArg1; break; 2205 // Scalar Compare Equal To Zero 2206 case AArch64::BI__builtin_neon_vceqzd_s64: 2207 case AArch64::BI__builtin_neon_vceqzd_u64: 2208 Int = Intrinsic::aarch64_neon_vceq; s = "vceq"; 2209 // Add implicit zero operand. 2210 Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); 2211 IntTypes = VectorRet | VectorGetArg0 | VectorGetArg1; break; 2212 // Scalar Compare Greater Than or Equal 2213 case AArch64::BI__builtin_neon_vcged_s64: 2214 Int = Intrinsic::aarch64_neon_vcge; s = "vcge"; 2215 IntTypes = VectorRet | VectorGetArg0 | VectorGetArg1; break; 2216 case AArch64::BI__builtin_neon_vcged_u64: 2217 Int = Intrinsic::aarch64_neon_vchs; s = "vcge"; 2218 IntTypes = VectorRet | VectorGetArg0 | VectorGetArg1; break; 2219 // Scalar Compare Greater Than or Equal To Zero 2220 case AArch64::BI__builtin_neon_vcgezd_s64: 2221 Int = Intrinsic::aarch64_neon_vcge; s = "vcge"; 2222 // Add implicit zero operand. 2223 Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); 2224 IntTypes = VectorRet | VectorGetArg0 | VectorGetArg1; break; 2225 // Scalar Compare Greater Than 2226 case AArch64::BI__builtin_neon_vcgtd_s64: 2227 Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt"; 2228 IntTypes = VectorRet | VectorGetArg0 | VectorGetArg1; break; 2229 case AArch64::BI__builtin_neon_vcgtd_u64: 2230 Int = Intrinsic::aarch64_neon_vchi; s = "vcgt"; 2231 IntTypes = VectorRet | VectorGetArg0 | VectorGetArg1; break; 2232 // Scalar Compare Greater Than Zero 2233 case AArch64::BI__builtin_neon_vcgtzd_s64: 2234 Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt"; 2235 // Add implicit zero operand. 2236 Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); 2237 IntTypes = VectorRet | VectorGetArg0 | VectorGetArg1; break; 2238 // Scalar Compare Less Than or Equal 2239 case AArch64::BI__builtin_neon_vcled_s64: 2240 Int = Intrinsic::aarch64_neon_vcge; s = "vcge"; 2241 std::swap(Ops[0], Ops[1]); 2242 IntTypes = VectorRet | VectorGetArg0 | VectorGetArg1; break; 2243 case AArch64::BI__builtin_neon_vcled_u64: 2244 Int = Intrinsic::aarch64_neon_vchs; s = "vchs"; 2245 std::swap(Ops[0], Ops[1]); 2246 IntTypes = VectorRet | VectorGetArg0 | VectorGetArg1; break; 2247 // Scalar Compare Less Than or Equal To Zero 2248 case AArch64::BI__builtin_neon_vclezd_s64: 2249 Int = Intrinsic::aarch64_neon_vclez; s = "vcle"; 2250 // Add implicit zero operand. 2251 Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); 2252 IntTypes = VectorRet | VectorGetArg0 | VectorGetArg1; break; 2253 // Scalar Compare Less Than 2254 case AArch64::BI__builtin_neon_vcltd_s64: 2255 Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt"; 2256 std::swap(Ops[0], Ops[1]); 2257 IntTypes = VectorRet | VectorGetArg0 | VectorGetArg1; break; 2258 case AArch64::BI__builtin_neon_vcltd_u64: 2259 Int = Intrinsic::aarch64_neon_vchi; s = "vchi"; 2260 std::swap(Ops[0], Ops[1]); 2261 IntTypes = VectorRet | VectorGetArg0 | VectorGetArg1; break; 2262 // Scalar Compare Less Than Zero 2263 case AArch64::BI__builtin_neon_vcltzd_s64: 2264 Int = Intrinsic::aarch64_neon_vcltz; s = "vclt"; 2265 // Add implicit zero operand. 2266 Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); 2267 IntTypes = VectorRet | VectorGetArg0 | VectorGetArg1; break; 2268 // Scalar Floating-point Compare Equal 2269 case AArch64::BI__builtin_neon_vceqs_f32: 2270 case AArch64::BI__builtin_neon_vceqd_f64: 2271 Int = Intrinsic::aarch64_neon_fceq; s = "vceq"; 2272 IntTypes = VectorRet | ScalarArg0 | ScalarArg1; break; 2273 // Scalar Floating-point Compare Equal To Zero 2274 case AArch64::BI__builtin_neon_vceqzs_f32: 2275 case AArch64::BI__builtin_neon_vceqzd_f64: 2276 Int = Intrinsic::aarch64_neon_fceq; s = "vceq"; 2277 // Add implicit zero operand. 2278 Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy)); 2279 IntTypes = VectorRet | ScalarArg0 | ScalarFpCmpzArg1; break; 2280 // Scalar Floating-point Compare Greater Than Or Equal 2281 case AArch64::BI__builtin_neon_vcges_f32: 2282 case AArch64::BI__builtin_neon_vcged_f64: 2283 Int = Intrinsic::aarch64_neon_fcge; s = "vcge"; 2284 IntTypes = VectorRet | ScalarArg0 | ScalarArg1; break; 2285 // Scalar Floating-point Compare Greater Than Or Equal To Zero 2286 case AArch64::BI__builtin_neon_vcgezs_f32: 2287 case AArch64::BI__builtin_neon_vcgezd_f64: 2288 Int = Intrinsic::aarch64_neon_fcge; s = "vcge"; 2289 // Add implicit zero operand. 2290 Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy)); 2291 IntTypes = VectorRet | ScalarArg0 | ScalarFpCmpzArg1; break; 2292 // Scalar Floating-point Compare Greather Than 2293 case AArch64::BI__builtin_neon_vcgts_f32: 2294 case AArch64::BI__builtin_neon_vcgtd_f64: 2295 Int = Intrinsic::aarch64_neon_fcgt; s = "vcgt"; 2296 IntTypes = VectorRet | ScalarArg0 | ScalarArg1; break; 2297 // Scalar Floating-point Compare Greather Than Zero 2298 case AArch64::BI__builtin_neon_vcgtzs_f32: 2299 case AArch64::BI__builtin_neon_vcgtzd_f64: 2300 Int = Intrinsic::aarch64_neon_fcgt; s = "vcgt"; 2301 // Add implicit zero operand. 2302 Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy)); 2303 IntTypes = VectorRet | ScalarArg0 | ScalarFpCmpzArg1; break; 2304 // Scalar Floating-point Compare Less Than or Equal 2305 case AArch64::BI__builtin_neon_vcles_f32: 2306 case AArch64::BI__builtin_neon_vcled_f64: 2307 Int = Intrinsic::aarch64_neon_fcge; s = "vcge"; 2308 IntTypes = VectorRet | ScalarArg0 | ScalarArg1; break; 2309 // Scalar Floating-point Compare Less Than Or Equal To Zero 2310 case AArch64::BI__builtin_neon_vclezs_f32: 2311 case AArch64::BI__builtin_neon_vclezd_f64: 2312 Int = Intrinsic::aarch64_neon_fclez; s = "vcle"; 2313 // Add implicit zero operand. 2314 Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy)); 2315 IntTypes = VectorRet | ScalarArg0 | ScalarFpCmpzArg1; break; 2316 // Scalar Floating-point Compare Less Than Zero 2317 case AArch64::BI__builtin_neon_vclts_f32: 2318 case AArch64::BI__builtin_neon_vcltd_f64: 2319 Int = Intrinsic::aarch64_neon_fcgt; s = "vcgt"; 2320 std::swap(Ops[0], Ops[1]); 2321 IntTypes = VectorRet | ScalarArg0 | ScalarArg1; break; 2322 // Scalar Floating-point Compare Less Than Zero 2323 case AArch64::BI__builtin_neon_vcltzs_f32: 2324 case AArch64::BI__builtin_neon_vcltzd_f64: 2325 Int = Intrinsic::aarch64_neon_fcltz; s = "vclt"; 2326 // Add implicit zero operand. 2327 Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy)); 2328 IntTypes = VectorRet | ScalarArg0 | ScalarFpCmpzArg1; break; 2329 // Scalar Floating-point Absolute Compare Greater Than Or Equal 2330 case AArch64::BI__builtin_neon_vcages_f32: 2331 case AArch64::BI__builtin_neon_vcaged_f64: 2332 Int = Intrinsic::aarch64_neon_fcage; s = "vcage"; 2333 IntTypes = VectorRet | ScalarArg0 | ScalarArg1; break; 2334 // Scalar Floating-point Absolute Compare Greater Than 2335 case AArch64::BI__builtin_neon_vcagts_f32: 2336 case AArch64::BI__builtin_neon_vcagtd_f64: 2337 Int = Intrinsic::aarch64_neon_fcagt; s = "vcagt"; 2338 IntTypes = VectorRet | ScalarArg0 | ScalarArg1; break; 2339 // Scalar Floating-point Absolute Compare Less Than Or Equal 2340 case AArch64::BI__builtin_neon_vcales_f32: 2341 case AArch64::BI__builtin_neon_vcaled_f64: 2342 Int = Intrinsic::aarch64_neon_fcage; s = "vcage"; 2343 std::swap(Ops[0], Ops[1]); 2344 IntTypes = VectorRet | ScalarArg0 | ScalarArg1; break; 2345 // Scalar Floating-point Absolute Compare Less Than 2346 case AArch64::BI__builtin_neon_vcalts_f32: 2347 case AArch64::BI__builtin_neon_vcaltd_f64: 2348 Int = Intrinsic::aarch64_neon_fcagt; s = "vcalt"; 2349 std::swap(Ops[0], Ops[1]); 2350 IntTypes = VectorRet | ScalarArg0 | ScalarArg1; break; 2351 // Scalar Compare Bitwise Test Bits 2352 case AArch64::BI__builtin_neon_vtstd_s64: 2353 case AArch64::BI__builtin_neon_vtstd_u64: 2354 Int = Intrinsic::aarch64_neon_vtstd; s = "vtst"; 2355 IntTypes = VectorRet | VectorGetArg0 | VectorGetArg1; break; 2356 // Scalar Absolute Value 2357 case AArch64::BI__builtin_neon_vabsd_s64: 2358 Int = Intrinsic::aarch64_neon_vabs; 2359 s = "vabs"; break; 2360 // Scalar Absolute Difference 2361 case AArch64::BI__builtin_neon_vabds_f32: 2362 case AArch64::BI__builtin_neon_vabdd_f64: 2363 Int = Intrinsic::aarch64_neon_vabd; 2364 s = "vabd"; IntTypes = ScalarRet; break; 2365 // Scalar Signed Saturating Absolute Value 2366 case AArch64::BI__builtin_neon_vqabsb_s8: 2367 case AArch64::BI__builtin_neon_vqabsh_s16: 2368 case AArch64::BI__builtin_neon_vqabss_s32: 2369 case AArch64::BI__builtin_neon_vqabsd_s64: 2370 Int = Intrinsic::arm_neon_vqabs; 2371 s = "vqabs"; IntTypes = VectorRet; break; 2372 // Scalar Negate 2373 case AArch64::BI__builtin_neon_vnegd_s64: 2374 Int = Intrinsic::aarch64_neon_vneg; 2375 s = "vneg"; break; 2376 // Scalar Signed Saturating Negate 2377 case AArch64::BI__builtin_neon_vqnegb_s8: 2378 case AArch64::BI__builtin_neon_vqnegh_s16: 2379 case AArch64::BI__builtin_neon_vqnegs_s32: 2380 case AArch64::BI__builtin_neon_vqnegd_s64: 2381 Int = Intrinsic::arm_neon_vqneg; 2382 s = "vqneg"; IntTypes = VectorRet; break; 2383 // Scalar Signed Saturating Accumulated of Unsigned Value 2384 case AArch64::BI__builtin_neon_vuqaddb_s8: 2385 case AArch64::BI__builtin_neon_vuqaddh_s16: 2386 case AArch64::BI__builtin_neon_vuqadds_s32: 2387 case AArch64::BI__builtin_neon_vuqaddd_s64: 2388 Int = Intrinsic::aarch64_neon_vuqadd; 2389 s = "vuqadd"; IntTypes = VectorRet; break; 2390 // Scalar Unsigned Saturating Accumulated of Signed Value 2391 case AArch64::BI__builtin_neon_vsqaddb_u8: 2392 case AArch64::BI__builtin_neon_vsqaddh_u16: 2393 case AArch64::BI__builtin_neon_vsqadds_u32: 2394 case AArch64::BI__builtin_neon_vsqaddd_u64: 2395 Int = Intrinsic::aarch64_neon_vsqadd; 2396 s = "vsqadd"; IntTypes = VectorRet; break; 2397 // Signed Saturating Doubling Multiply-Add Long 2398 case AArch64::BI__builtin_neon_vqdmlalh_s16: 2399 case AArch64::BI__builtin_neon_vqdmlals_s32: 2400 Int = Intrinsic::aarch64_neon_vqdmlal; 2401 s = "vqdmlal"; IntTypes = VectorRet; break; 2402 // Signed Saturating Doubling Multiply-Subtract Long 2403 case AArch64::BI__builtin_neon_vqdmlslh_s16: 2404 case AArch64::BI__builtin_neon_vqdmlsls_s32: 2405 Int = Intrinsic::aarch64_neon_vqdmlsl; 2406 s = "vqdmlsl"; IntTypes = VectorRet; break; 2407 // Signed Saturating Doubling Multiply Long 2408 case AArch64::BI__builtin_neon_vqdmullh_s16: 2409 case AArch64::BI__builtin_neon_vqdmulls_s32: 2410 Int = Intrinsic::arm_neon_vqdmull; 2411 s = "vqdmull"; IntTypes = VectorRet; break; 2412 // Scalar Signed Saturating Extract Unsigned Narrow 2413 case AArch64::BI__builtin_neon_vqmovunh_s16: 2414 case AArch64::BI__builtin_neon_vqmovuns_s32: 2415 case AArch64::BI__builtin_neon_vqmovund_s64: 2416 Int = Intrinsic::arm_neon_vqmovnsu; 2417 s = "vqmovun"; IntTypes = VectorRet; break; 2418 // Scalar Signed Saturating Extract Narrow 2419 case AArch64::BI__builtin_neon_vqmovnh_s16: 2420 case AArch64::BI__builtin_neon_vqmovns_s32: 2421 case AArch64::BI__builtin_neon_vqmovnd_s64: 2422 Int = Intrinsic::arm_neon_vqmovns; 2423 s = "vqmovn"; IntTypes = VectorRet; break; 2424 // Scalar Unsigned Saturating Extract Narrow 2425 case AArch64::BI__builtin_neon_vqmovnh_u16: 2426 case AArch64::BI__builtin_neon_vqmovns_u32: 2427 case AArch64::BI__builtin_neon_vqmovnd_u64: 2428 Int = Intrinsic::arm_neon_vqmovnu; 2429 s = "vqmovn"; IntTypes = VectorRet; break; 2430 // Scalar Signed Shift Right (Immediate) 2431 case AArch64::BI__builtin_neon_vshrd_n_s64: 2432 Int = Intrinsic::aarch64_neon_vshrds_n; 2433 s = "vsshr"; break; 2434 // Scalar Unsigned Shift Right (Immediate) 2435 case AArch64::BI__builtin_neon_vshrd_n_u64: 2436 Int = Intrinsic::aarch64_neon_vshrdu_n; 2437 s = "vushr"; break; 2438 // Scalar Signed Rounding Shift Right (Immediate) 2439 case AArch64::BI__builtin_neon_vrshrd_n_s64: 2440 Int = Intrinsic::aarch64_neon_vsrshr; 2441 s = "vsrshr"; IntTypes = VectorRet; break; 2442 // Scalar Unsigned Rounding Shift Right (Immediate) 2443 case AArch64::BI__builtin_neon_vrshrd_n_u64: 2444 Int = Intrinsic::aarch64_neon_vurshr; 2445 s = "vurshr"; IntTypes = VectorRet; break; 2446 // Scalar Signed Shift Right and Accumulate (Immediate) 2447 case AArch64::BI__builtin_neon_vsrad_n_s64: 2448 Int = Intrinsic::aarch64_neon_vsrads_n; 2449 s = "vssra"; break; 2450 // Scalar Unsigned Shift Right and Accumulate (Immediate) 2451 case AArch64::BI__builtin_neon_vsrad_n_u64: 2452 Int = Intrinsic::aarch64_neon_vsradu_n; 2453 s = "vusra"; break; 2454 // Scalar Signed Rounding Shift Right and Accumulate (Immediate) 2455 case AArch64::BI__builtin_neon_vrsrad_n_s64: 2456 Int = Intrinsic::aarch64_neon_vrsrads_n; 2457 s = "vsrsra"; break; 2458 // Scalar Unsigned Rounding Shift Right and Accumulate (Immediate) 2459 case AArch64::BI__builtin_neon_vrsrad_n_u64: 2460 Int = Intrinsic::aarch64_neon_vrsradu_n; 2461 s = "vursra"; break; 2462 // Scalar Signed/Unsigned Shift Left (Immediate) 2463 case AArch64::BI__builtin_neon_vshld_n_s64: 2464 case AArch64::BI__builtin_neon_vshld_n_u64: 2465 Int = Intrinsic::aarch64_neon_vshld_n; 2466 s = "vshl"; break; 2467 // Signed Saturating Shift Left (Immediate) 2468 case AArch64::BI__builtin_neon_vqshlb_n_s8: 2469 case AArch64::BI__builtin_neon_vqshlh_n_s16: 2470 case AArch64::BI__builtin_neon_vqshls_n_s32: 2471 case AArch64::BI__builtin_neon_vqshld_n_s64: 2472 Int = Intrinsic::aarch64_neon_vqshls_n; 2473 s = "vsqshl"; IntTypes = VectorRet; break; 2474 // Unsigned Saturating Shift Left (Immediate) 2475 case AArch64::BI__builtin_neon_vqshlb_n_u8: 2476 case AArch64::BI__builtin_neon_vqshlh_n_u16: 2477 case AArch64::BI__builtin_neon_vqshls_n_u32: 2478 case AArch64::BI__builtin_neon_vqshld_n_u64: 2479 Int = Intrinsic::aarch64_neon_vqshlu_n; 2480 s = "vuqshl"; IntTypes = VectorRet; break; 2481 // Signed Saturating Shift Left Unsigned (Immediate) 2482 case AArch64::BI__builtin_neon_vqshlub_n_s8: 2483 case AArch64::BI__builtin_neon_vqshluh_n_s16: 2484 case AArch64::BI__builtin_neon_vqshlus_n_s32: 2485 case AArch64::BI__builtin_neon_vqshlud_n_s64: 2486 Int = Intrinsic::aarch64_neon_vsqshlu; 2487 s = "vsqshlu"; IntTypes = VectorRet; break; 2488 // Shift Right And Insert (Immediate) 2489 case AArch64::BI__builtin_neon_vsrid_n_s64: 2490 case AArch64::BI__builtin_neon_vsrid_n_u64: 2491 Int = Intrinsic::aarch64_neon_vsri; 2492 s = "vsri"; IntTypes = VectorRet; break; 2493 // Shift Left And Insert (Immediate) 2494 case AArch64::BI__builtin_neon_vslid_n_s64: 2495 case AArch64::BI__builtin_neon_vslid_n_u64: 2496 Int = Intrinsic::aarch64_neon_vsli; 2497 s = "vsli"; IntTypes = VectorRet; break; 2498 // Signed Saturating Shift Right Narrow (Immediate) 2499 case AArch64::BI__builtin_neon_vqshrnh_n_s16: 2500 case AArch64::BI__builtin_neon_vqshrns_n_s32: 2501 case AArch64::BI__builtin_neon_vqshrnd_n_s64: 2502 Int = Intrinsic::aarch64_neon_vsqshrn; 2503 s = "vsqshrn"; IntTypes = VectorRet; break; 2504 // Unsigned Saturating Shift Right Narrow (Immediate) 2505 case AArch64::BI__builtin_neon_vqshrnh_n_u16: 2506 case AArch64::BI__builtin_neon_vqshrns_n_u32: 2507 case AArch64::BI__builtin_neon_vqshrnd_n_u64: 2508 Int = Intrinsic::aarch64_neon_vuqshrn; 2509 s = "vuqshrn"; IntTypes = VectorRet; break; 2510 // Signed Saturating Rounded Shift Right Narrow (Immediate) 2511 case AArch64::BI__builtin_neon_vqrshrnh_n_s16: 2512 case AArch64::BI__builtin_neon_vqrshrns_n_s32: 2513 case AArch64::BI__builtin_neon_vqrshrnd_n_s64: 2514 Int = Intrinsic::aarch64_neon_vsqrshrn; 2515 s = "vsqrshrn"; IntTypes = VectorRet; break; 2516 // Unsigned Saturating Rounded Shift Right Narrow (Immediate) 2517 case AArch64::BI__builtin_neon_vqrshrnh_n_u16: 2518 case AArch64::BI__builtin_neon_vqrshrns_n_u32: 2519 case AArch64::BI__builtin_neon_vqrshrnd_n_u64: 2520 Int = Intrinsic::aarch64_neon_vuqrshrn; 2521 s = "vuqrshrn"; IntTypes = VectorRet; break; 2522 // Signed Saturating Shift Right Unsigned Narrow (Immediate) 2523 case AArch64::BI__builtin_neon_vqshrunh_n_s16: 2524 case AArch64::BI__builtin_neon_vqshruns_n_s32: 2525 case AArch64::BI__builtin_neon_vqshrund_n_s64: 2526 Int = Intrinsic::aarch64_neon_vsqshrun; 2527 s = "vsqshrun"; IntTypes = VectorRet; break; 2528 // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate) 2529 case AArch64::BI__builtin_neon_vqrshrunh_n_s16: 2530 case AArch64::BI__builtin_neon_vqrshruns_n_s32: 2531 case AArch64::BI__builtin_neon_vqrshrund_n_s64: 2532 Int = Intrinsic::aarch64_neon_vsqrshrun; 2533 s = "vsqrshrun"; IntTypes = VectorRet; break; 2534 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate) 2535 case AArch64::BI__builtin_neon_vcvts_n_f32_s32: 2536 case AArch64::BI__builtin_neon_vcvtd_n_f64_s64: 2537 Int = Intrinsic::aarch64_neon_vcvtfxs2fp_n; 2538 s = "vcvtf"; IntTypes = ScalarRet | VectorGetArg0; break; 2539 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate) 2540 case AArch64::BI__builtin_neon_vcvts_n_f32_u32: 2541 case AArch64::BI__builtin_neon_vcvtd_n_f64_u64: 2542 Int = Intrinsic::aarch64_neon_vcvtfxu2fp_n; 2543 s = "vcvtf"; IntTypes = ScalarRet | VectorGetArg0; break; 2544 // Scalar Floating-point Convert To Signed Fixed-point (Immediate) 2545 case AArch64::BI__builtin_neon_vcvts_n_s32_f32: 2546 case AArch64::BI__builtin_neon_vcvtd_n_s64_f64: 2547 Int = Intrinsic::aarch64_neon_vcvtfp2fxs_n; 2548 s = "fcvtzs"; IntTypes = VectorRet | ScalarArg0; break; 2549 // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate) 2550 case AArch64::BI__builtin_neon_vcvts_n_u32_f32: 2551 case AArch64::BI__builtin_neon_vcvtd_n_u64_f64: 2552 Int = Intrinsic::aarch64_neon_vcvtfp2fxu_n; 2553 s = "fcvtzu"; IntTypes = VectorRet | ScalarArg0; break; 2554 case AArch64::BI__builtin_neon_vmull_p64: 2555 Int = Intrinsic::aarch64_neon_vmull_p64; 2556 s = "vmull"; break; 2557 } 2558 2559 if (!Int) 2560 return 0; 2561 2562 // Determine the type(s) of this overloaded AArch64 intrinsic. 2563 Function *F = 0; 2564 SmallVector<llvm::Type *, 3> Tys; 2565 2566 // Return type. 2567 if (IntTypes & (ScalarRet | VectorRet)) { 2568 llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); 2569 if (IntTypes & ScalarRet) { 2570 // Scalar return value. 2571 Tys.push_back(Ty); 2572 } else if (IntTypes & VectorRet) { 2573 // Convert the scalar return type to one-vector element type. 2574 Tys.push_back(llvm::VectorType::get(Ty, 1)); 2575 } 2576 } 2577 2578 // Arguments. 2579 if (IntTypes & (ScalarArg0 | VectorGetArg0 | VectorCastArg0)) { 2580 const Expr *Arg = E->getArg(0); 2581 llvm::Type *Ty = CGF.ConvertType(Arg->getType()); 2582 if (IntTypes & ScalarArg0) { 2583 // Scalar argument. 2584 Tys.push_back(Ty); 2585 } else if (IntTypes & VectorGetArg0) { 2586 // Convert the scalar argument to one-vector element type. 2587 Tys.push_back(llvm::VectorType::get(Ty, 1)); 2588 } else if (IntTypes & VectorCastArg0) { 2589 // Cast the argument to vector type. 2590 Tys.push_back(cast<llvm::VectorType>(Ty)); 2591 } 2592 } 2593 2594 // The only intrinsics that require a 2nd argument are the compare intrinsics. 2595 // However, the builtins don't always have a 2nd argument (e.g., 2596 // floating-point compare to zero), so we inspect the first argument to 2597 // determine the type. 2598 if (IntTypes & (ScalarArg1 | VectorGetArg1 | VectorCastArg1)) { 2599 const Expr *Arg = E->getArg(0); 2600 llvm::Type *Ty = CGF.ConvertType(Arg->getType()); 2601 if (IntTypes & ScalarArg1) { 2602 // Scalar argument. 2603 Tys.push_back(Ty); 2604 } else if (IntTypes & VectorGetArg1) { 2605 // Convert the scalar argument to one-vector element type. 2606 Tys.push_back(llvm::VectorType::get(Ty, 1)); 2607 } else if (IntTypes & VectorCastArg1) { 2608 // Cast the argument to a vector type. 2609 Tys.push_back(cast<llvm::VectorType>(Ty)); 2610 } 2611 } else if (IntTypes & ScalarFpCmpzArg1) { 2612 // Floating-point zero argument. 2613 Tys.push_back(CGF.FloatTy); 2614 } 2615 2616 if (IntTypes) 2617 F = CGF.CGM.getIntrinsic(Int, Tys); 2618 else 2619 F = CGF.CGM.getIntrinsic(Int); 2620 2621 Value *Result = CGF.EmitNeonCall(F, Ops, s); 2622 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 2623 // AArch64 intrinsic one-element vector type cast to 2624 // scalar type expected by the builtin 2625 return CGF.Builder.CreateBitCast(Result, ResultType, s); 2626 } 2627 2628 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( 2629 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, 2630 const CmpInst::Predicate Ip, const Twine &Name) { 2631 llvm::Type *OTy = ((llvm::User *)Op)->getOperand(0)->getType(); 2632 if (OTy->isPointerTy()) 2633 OTy = Ty; 2634 Op = Builder.CreateBitCast(Op, OTy); 2635 if (((llvm::VectorType *)OTy)->getElementType()->isFloatingPointTy()) { 2636 Op = Builder.CreateFCmp(Fp, Op, ConstantAggregateZero::get(OTy)); 2637 } else { 2638 Op = Builder.CreateICmp(Ip, Op, ConstantAggregateZero::get(OTy)); 2639 } 2640 return Builder.CreateSExt(Op, Ty, Name); 2641 } 2642 2643 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 2644 Value *ExtOp, Value *IndexOp, 2645 llvm::Type *ResTy, unsigned IntID, 2646 const char *Name) { 2647 SmallVector<Value *, 2> TblOps; 2648 if (ExtOp) 2649 TblOps.push_back(ExtOp); 2650 2651 // Build a vector containing sequential number like (0, 1, 2, ..., 15) 2652 SmallVector<Constant*, 16> Indices; 2653 llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); 2654 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { 2655 Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i)); 2656 Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1)); 2657 } 2658 Value *SV = llvm::ConstantVector::get(Indices); 2659 2660 int PairPos = 0, End = Ops.size() - 1; 2661 while (PairPos < End) { 2662 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 2663 Ops[PairPos+1], SV, Name)); 2664 PairPos += 2; 2665 } 2666 2667 // If there's an odd number of 64-bit lookup table, fill the high 64-bit 2668 // of the 128-bit lookup table with zero. 2669 if (PairPos == End) { 2670 Value *ZeroTbl = ConstantAggregateZero::get(TblTy); 2671 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 2672 ZeroTbl, SV, Name)); 2673 } 2674 2675 TblTy = llvm::VectorType::get(TblTy->getElementType(), 2676 2*TblTy->getNumElements()); 2677 llvm::Type *Tys[2] = { ResTy, TblTy }; 2678 2679 Function *TblF; 2680 TblOps.push_back(IndexOp); 2681 TblF = CGF.CGM.getIntrinsic(IntID, Tys); 2682 2683 return CGF.EmitNeonCall(TblF, TblOps, Name); 2684 } 2685 2686 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, 2687 unsigned BuiltinID, 2688 const CallExpr *E) { 2689 unsigned int Int = 0; 2690 const char *s = NULL; 2691 2692 unsigned TblPos; 2693 switch (BuiltinID) { 2694 default: 2695 return 0; 2696 case AArch64::BI__builtin_neon_vtbl1_v: 2697 case AArch64::BI__builtin_neon_vqtbl1_v: 2698 case AArch64::BI__builtin_neon_vqtbl1q_v: 2699 case AArch64::BI__builtin_neon_vtbl2_v: 2700 case AArch64::BI__builtin_neon_vqtbl2_v: 2701 case AArch64::BI__builtin_neon_vqtbl2q_v: 2702 case AArch64::BI__builtin_neon_vtbl3_v: 2703 case AArch64::BI__builtin_neon_vqtbl3_v: 2704 case AArch64::BI__builtin_neon_vqtbl3q_v: 2705 case AArch64::BI__builtin_neon_vtbl4_v: 2706 case AArch64::BI__builtin_neon_vqtbl4_v: 2707 case AArch64::BI__builtin_neon_vqtbl4q_v: 2708 TblPos = 0; 2709 break; 2710 case AArch64::BI__builtin_neon_vtbx1_v: 2711 case AArch64::BI__builtin_neon_vqtbx1_v: 2712 case AArch64::BI__builtin_neon_vqtbx1q_v: 2713 case AArch64::BI__builtin_neon_vtbx2_v: 2714 case AArch64::BI__builtin_neon_vqtbx2_v: 2715 case AArch64::BI__builtin_neon_vqtbx2q_v: 2716 case AArch64::BI__builtin_neon_vtbx3_v: 2717 case AArch64::BI__builtin_neon_vqtbx3_v: 2718 case AArch64::BI__builtin_neon_vqtbx3q_v: 2719 case AArch64::BI__builtin_neon_vtbx4_v: 2720 case AArch64::BI__builtin_neon_vqtbx4_v: 2721 case AArch64::BI__builtin_neon_vqtbx4q_v: 2722 TblPos = 1; 2723 break; 2724 } 2725 2726 assert(E->getNumArgs() >= 3); 2727 2728 // Get the last argument, which specifies the vector type. 2729 llvm::APSInt Result; 2730 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 2731 if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) 2732 return 0; 2733 2734 // Determine the type of this overloaded NEON intrinsic. 2735 NeonTypeFlags Type(Result.getZExtValue()); 2736 llvm::VectorType *VTy = GetNeonType(&CGF, Type); 2737 llvm::Type *Ty = VTy; 2738 if (!Ty) 2739 return 0; 2740 2741 SmallVector<Value *, 4> Ops; 2742 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 2743 Ops.push_back(CGF.EmitScalarExpr(E->getArg(i))); 2744 } 2745 2746 Arg = E->getArg(TblPos); 2747 llvm::Type *TblTy = CGF.ConvertType(Arg->getType()); 2748 llvm::VectorType *VTblTy = cast<llvm::VectorType>(TblTy); 2749 llvm::Type *Tys[2] = { Ty, VTblTy }; 2750 unsigned nElts = VTy->getNumElements(); 2751 2752 // AArch64 scalar builtins are not overloaded, they do not have an extra 2753 // argument that specifies the vector type, need to handle each case. 2754 SmallVector<Value *, 2> TblOps; 2755 switch (BuiltinID) { 2756 case AArch64::BI__builtin_neon_vtbl1_v: { 2757 TblOps.push_back(Ops[0]); 2758 return packTBLDVectorList(CGF, TblOps, 0, Ops[1], Ty, 2759 Intrinsic::aarch64_neon_vtbl1, "vtbl1"); 2760 } 2761 case AArch64::BI__builtin_neon_vtbl2_v: { 2762 TblOps.push_back(Ops[0]); 2763 TblOps.push_back(Ops[1]); 2764 return packTBLDVectorList(CGF, TblOps, 0, Ops[2], Ty, 2765 Intrinsic::aarch64_neon_vtbl1, "vtbl1"); 2766 } 2767 case AArch64::BI__builtin_neon_vtbl3_v: { 2768 TblOps.push_back(Ops[0]); 2769 TblOps.push_back(Ops[1]); 2770 TblOps.push_back(Ops[2]); 2771 return packTBLDVectorList(CGF, TblOps, 0, Ops[3], Ty, 2772 Intrinsic::aarch64_neon_vtbl2, "vtbl2"); 2773 } 2774 case AArch64::BI__builtin_neon_vtbl4_v: { 2775 TblOps.push_back(Ops[0]); 2776 TblOps.push_back(Ops[1]); 2777 TblOps.push_back(Ops[2]); 2778 TblOps.push_back(Ops[3]); 2779 return packTBLDVectorList(CGF, TblOps, 0, Ops[4], Ty, 2780 Intrinsic::aarch64_neon_vtbl2, "vtbl2"); 2781 } 2782 case AArch64::BI__builtin_neon_vtbx1_v: { 2783 TblOps.push_back(Ops[1]); 2784 Value *TblRes = packTBLDVectorList(CGF, TblOps, 0, Ops[2], Ty, 2785 Intrinsic::aarch64_neon_vtbl1, "vtbl1"); 2786 2787 llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8); 2788 Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight); 2789 Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); 2790 CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty); 2791 2792 SmallVector<Value *, 4> BslOps; 2793 BslOps.push_back(CmpRes); 2794 BslOps.push_back(Ops[0]); 2795 BslOps.push_back(TblRes); 2796 Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty); 2797 return CGF.EmitNeonCall(BslF, BslOps, "vbsl"); 2798 } 2799 case AArch64::BI__builtin_neon_vtbx2_v: { 2800 TblOps.push_back(Ops[1]); 2801 TblOps.push_back(Ops[2]); 2802 return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty, 2803 Intrinsic::aarch64_neon_vtbx1, "vtbx1"); 2804 } 2805 case AArch64::BI__builtin_neon_vtbx3_v: { 2806 TblOps.push_back(Ops[1]); 2807 TblOps.push_back(Ops[2]); 2808 TblOps.push_back(Ops[3]); 2809 Value *TblRes = packTBLDVectorList(CGF, TblOps, 0, Ops[4], Ty, 2810 Intrinsic::aarch64_neon_vtbl2, "vtbl2"); 2811 2812 llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24); 2813 Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour); 2814 Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], 2815 TwentyFourV); 2816 CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty); 2817 2818 SmallVector<Value *, 4> BslOps; 2819 BslOps.push_back(CmpRes); 2820 BslOps.push_back(Ops[0]); 2821 BslOps.push_back(TblRes); 2822 Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty); 2823 return CGF.EmitNeonCall(BslF, BslOps, "vbsl"); 2824 } 2825 case AArch64::BI__builtin_neon_vtbx4_v: { 2826 TblOps.push_back(Ops[1]); 2827 TblOps.push_back(Ops[2]); 2828 TblOps.push_back(Ops[3]); 2829 TblOps.push_back(Ops[4]); 2830 return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty, 2831 Intrinsic::aarch64_neon_vtbx2, "vtbx2"); 2832 } 2833 case AArch64::BI__builtin_neon_vqtbl1_v: 2834 case AArch64::BI__builtin_neon_vqtbl1q_v: 2835 Int = Intrinsic::aarch64_neon_vtbl1; s = "vtbl1"; break; 2836 case AArch64::BI__builtin_neon_vqtbl2_v: 2837 case AArch64::BI__builtin_neon_vqtbl2q_v: { 2838 Int = Intrinsic::aarch64_neon_vtbl2; s = "vtbl2"; break; 2839 case AArch64::BI__builtin_neon_vqtbl3_v: 2840 case AArch64::BI__builtin_neon_vqtbl3q_v: 2841 Int = Intrinsic::aarch64_neon_vtbl3; s = "vtbl3"; break; 2842 case AArch64::BI__builtin_neon_vqtbl4_v: 2843 case AArch64::BI__builtin_neon_vqtbl4q_v: 2844 Int = Intrinsic::aarch64_neon_vtbl4; s = "vtbl4"; break; 2845 case AArch64::BI__builtin_neon_vqtbx1_v: 2846 case AArch64::BI__builtin_neon_vqtbx1q_v: 2847 Int = Intrinsic::aarch64_neon_vtbx1; s = "vtbx1"; break; 2848 case AArch64::BI__builtin_neon_vqtbx2_v: 2849 case AArch64::BI__builtin_neon_vqtbx2q_v: 2850 Int = Intrinsic::aarch64_neon_vtbx2; s = "vtbx2"; break; 2851 case AArch64::BI__builtin_neon_vqtbx3_v: 2852 case AArch64::BI__builtin_neon_vqtbx3q_v: 2853 Int = Intrinsic::aarch64_neon_vtbx3; s = "vtbx3"; break; 2854 case AArch64::BI__builtin_neon_vqtbx4_v: 2855 case AArch64::BI__builtin_neon_vqtbx4q_v: 2856 Int = Intrinsic::aarch64_neon_vtbx4; s = "vtbx4"; break; 2857 } 2858 } 2859 2860 if (!Int) 2861 return 0; 2862 2863 Function *F = CGF.CGM.getIntrinsic(Int, Tys); 2864 return CGF.EmitNeonCall(F, Ops, s); 2865 } 2866 2867 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, 2868 const CallExpr *E) { 2869 // Process AArch64 scalar builtins 2870 if (Value *Result = EmitAArch64ScalarBuiltinExpr(*this, BuiltinID, E)) 2871 return Result; 2872 2873 // Process AArch64 table lookup builtins 2874 if (Value *Result = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E)) 2875 return Result; 2876 2877 if (BuiltinID == AArch64::BI__clear_cache) { 2878 assert(E->getNumArgs() == 2 && 2879 "Variadic __clear_cache slipped through on AArch64"); 2880 2881 const FunctionDecl *FD = E->getDirectCallee(); 2882 SmallVector<Value *, 2> Ops; 2883 for (unsigned i = 0; i < E->getNumArgs(); i++) 2884 Ops.push_back(EmitScalarExpr(E->getArg(i))); 2885 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 2886 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 2887 StringRef Name = FD->getName(); 2888 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 2889 } 2890 2891 SmallVector<Value *, 4> Ops; 2892 llvm::Value *Align = 0; // Alignment for load/store 2893 2894 if (BuiltinID == AArch64::BI__builtin_neon_vldrq_p128) { 2895 Value *Op = EmitScalarExpr(E->getArg(0)); 2896 unsigned addressSpace = 2897 cast<llvm::PointerType>(Op->getType())->getAddressSpace(); 2898 llvm::Type *Ty = llvm::Type::getFP128PtrTy(getLLVMContext(), addressSpace); 2899 Op = Builder.CreateBitCast(Op, Ty); 2900 Op = Builder.CreateLoad(Op); 2901 Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); 2902 return Builder.CreateBitCast(Op, Ty); 2903 } 2904 if (BuiltinID == AArch64::BI__builtin_neon_vstrq_p128) { 2905 Value *Op0 = EmitScalarExpr(E->getArg(0)); 2906 unsigned addressSpace = 2907 cast<llvm::PointerType>(Op0->getType())->getAddressSpace(); 2908 llvm::Type *PTy = llvm::Type::getFP128PtrTy(getLLVMContext(), addressSpace); 2909 Op0 = Builder.CreateBitCast(Op0, PTy); 2910 Value *Op1 = EmitScalarExpr(E->getArg(1)); 2911 llvm::Type *Ty = llvm::Type::getFP128Ty(getLLVMContext()); 2912 Op1 = Builder.CreateBitCast(Op1, Ty); 2913 return Builder.CreateStore(Op1, Op0); 2914 } 2915 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 2916 if (i == 0) { 2917 switch (BuiltinID) { 2918 case AArch64::BI__builtin_neon_vst1_x2_v: 2919 case AArch64::BI__builtin_neon_vst1q_x2_v: 2920 case AArch64::BI__builtin_neon_vst1_x3_v: 2921 case AArch64::BI__builtin_neon_vst1q_x3_v: 2922 case AArch64::BI__builtin_neon_vst1_x4_v: 2923 case AArch64::BI__builtin_neon_vst1q_x4_v: 2924 // Handle ld1/st1 lane in this function a little different from ARM. 2925 case AArch64::BI__builtin_neon_vld1_lane_v: 2926 case AArch64::BI__builtin_neon_vld1q_lane_v: 2927 case AArch64::BI__builtin_neon_vst1_lane_v: 2928 case AArch64::BI__builtin_neon_vst1q_lane_v: 2929 // Get the alignment for the argument in addition to the value; 2930 // we'll use it later. 2931 std::pair<llvm::Value *, unsigned> Src = 2932 EmitPointerWithAlignment(E->getArg(0)); 2933 Ops.push_back(Src.first); 2934 Align = Builder.getInt32(Src.second); 2935 continue; 2936 } 2937 } 2938 if (i == 1) { 2939 switch (BuiltinID) { 2940 case AArch64::BI__builtin_neon_vld1_x2_v: 2941 case AArch64::BI__builtin_neon_vld1q_x2_v: 2942 case AArch64::BI__builtin_neon_vld1_x3_v: 2943 case AArch64::BI__builtin_neon_vld1q_x3_v: 2944 case AArch64::BI__builtin_neon_vld1_x4_v: 2945 case AArch64::BI__builtin_neon_vld1q_x4_v: 2946 // Handle ld1/st1 dup lane in this function a little different from ARM. 2947 case AArch64::BI__builtin_neon_vld2_dup_v: 2948 case AArch64::BI__builtin_neon_vld2q_dup_v: 2949 case AArch64::BI__builtin_neon_vld3_dup_v: 2950 case AArch64::BI__builtin_neon_vld3q_dup_v: 2951 case AArch64::BI__builtin_neon_vld4_dup_v: 2952 case AArch64::BI__builtin_neon_vld4q_dup_v: 2953 case AArch64::BI__builtin_neon_vld2_lane_v: 2954 case AArch64::BI__builtin_neon_vld2q_lane_v: 2955 // Get the alignment for the argument in addition to the value; 2956 // we'll use it later. 2957 std::pair<llvm::Value *, unsigned> Src = 2958 EmitPointerWithAlignment(E->getArg(1)); 2959 Ops.push_back(Src.first); 2960 Align = Builder.getInt32(Src.second); 2961 continue; 2962 } 2963 } 2964 Ops.push_back(EmitScalarExpr(E->getArg(i))); 2965 } 2966 2967 // Get the last argument, which specifies the vector type. 2968 llvm::APSInt Result; 2969 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 2970 if (!Arg->isIntegerConstantExpr(Result, getContext())) 2971 return 0; 2972 2973 // Determine the type of this overloaded NEON intrinsic. 2974 NeonTypeFlags Type(Result.getZExtValue()); 2975 bool usgn = Type.isUnsigned(); 2976 bool quad = Type.isQuad(); 2977 2978 llvm::VectorType *VTy = GetNeonType(this, Type); 2979 llvm::Type *Ty = VTy; 2980 if (!Ty) 2981 return 0; 2982 2983 unsigned Int; 2984 switch (BuiltinID) { 2985 default: 2986 return 0; 2987 2988 // AArch64 builtins mapping to legacy ARM v7 builtins. 2989 // FIXME: the mapped builtins listed correspond to what has been tested 2990 // in aarch64-neon-intrinsics.c so far. 2991 case AArch64::BI__builtin_neon_vuzp_v: 2992 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vuzp_v, E); 2993 case AArch64::BI__builtin_neon_vuzpq_v: 2994 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vuzpq_v, E); 2995 case AArch64::BI__builtin_neon_vzip_v: 2996 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vzip_v, E); 2997 case AArch64::BI__builtin_neon_vzipq_v: 2998 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vzipq_v, E); 2999 case AArch64::BI__builtin_neon_vtrn_v: 3000 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtrn_v, E); 3001 case AArch64::BI__builtin_neon_vtrnq_v: 3002 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtrnq_v, E); 3003 case AArch64::BI__builtin_neon_vext_v: 3004 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vext_v, E); 3005 case AArch64::BI__builtin_neon_vextq_v: 3006 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vextq_v, E); 3007 case AArch64::BI__builtin_neon_vmul_v: 3008 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmul_v, E); 3009 case AArch64::BI__builtin_neon_vmulq_v: 3010 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmulq_v, E); 3011 case AArch64::BI__builtin_neon_vabd_v: 3012 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabd_v, E); 3013 case AArch64::BI__builtin_neon_vabdq_v: 3014 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabdq_v, E); 3015 case AArch64::BI__builtin_neon_vfma_v: 3016 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vfma_v, E); 3017 case AArch64::BI__builtin_neon_vfmaq_v: 3018 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vfmaq_v, E); 3019 case AArch64::BI__builtin_neon_vbsl_v: 3020 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vbsl_v, E); 3021 case AArch64::BI__builtin_neon_vbslq_v: 3022 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vbslq_v, E); 3023 case AArch64::BI__builtin_neon_vrsqrts_v: 3024 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrts_v, E); 3025 case AArch64::BI__builtin_neon_vrsqrtsq_v: 3026 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrtsq_v, E); 3027 case AArch64::BI__builtin_neon_vrecps_v: 3028 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecps_v, E); 3029 case AArch64::BI__builtin_neon_vrecpsq_v: 3030 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecpsq_v, E); 3031 case AArch64::BI__builtin_neon_vcale_v: 3032 if (VTy->getVectorNumElements() == 1) { 3033 std::swap(Ops[0], Ops[1]); 3034 } else { 3035 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcale_v, E); 3036 } 3037 case AArch64::BI__builtin_neon_vcage_v: 3038 if (VTy->getVectorNumElements() == 1) { 3039 // Determine the types of this overloaded AArch64 intrinsic 3040 SmallVector<llvm::Type *, 3> Tys; 3041 Tys.push_back(VTy); 3042 VTy = llvm::VectorType::get(DoubleTy, 1); 3043 Tys.push_back(VTy); 3044 Tys.push_back(VTy); 3045 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vcage, Tys); 3046 return EmitNeonCall(F, Ops, "vcage"); 3047 } 3048 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcage_v, E); 3049 case AArch64::BI__builtin_neon_vcaleq_v: 3050 std::swap(Ops[0], Ops[1]); 3051 case AArch64::BI__builtin_neon_vcageq_v: { 3052 Function *F; 3053 if (VTy->getElementType()->isIntegerTy(64)) 3054 F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vacgeq); 3055 else 3056 F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq); 3057 return EmitNeonCall(F, Ops, "vcage"); 3058 } 3059 case AArch64::BI__builtin_neon_vcalt_v: 3060 if (VTy->getVectorNumElements() == 1) { 3061 std::swap(Ops[0], Ops[1]); 3062 } else { 3063 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcalt_v, E); 3064 } 3065 case AArch64::BI__builtin_neon_vcagt_v: 3066 if (VTy->getVectorNumElements() == 1) { 3067 // Determine the types of this overloaded AArch64 intrinsic 3068 SmallVector<llvm::Type *, 3> Tys; 3069 Tys.push_back(VTy); 3070 VTy = llvm::VectorType::get(DoubleTy, 1); 3071 Tys.push_back(VTy); 3072 Tys.push_back(VTy); 3073 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vcagt, Tys); 3074 return EmitNeonCall(F, Ops, "vcagt"); 3075 } 3076 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcagt_v, E); 3077 case AArch64::BI__builtin_neon_vcaltq_v: 3078 std::swap(Ops[0], Ops[1]); 3079 case AArch64::BI__builtin_neon_vcagtq_v: { 3080 Function *F; 3081 if (VTy->getElementType()->isIntegerTy(64)) 3082 F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vacgtq); 3083 else 3084 F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq); 3085 return EmitNeonCall(F, Ops, "vcagt"); 3086 } 3087 case AArch64::BI__builtin_neon_vtst_v: 3088 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtst_v, E); 3089 case AArch64::BI__builtin_neon_vtstq_v: 3090 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtstq_v, E); 3091 case AArch64::BI__builtin_neon_vhadd_v: 3092 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhadd_v, E); 3093 case AArch64::BI__builtin_neon_vhaddq_v: 3094 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhaddq_v, E); 3095 case AArch64::BI__builtin_neon_vhsub_v: 3096 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhsub_v, E); 3097 case AArch64::BI__builtin_neon_vhsubq_v: 3098 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhsubq_v, E); 3099 case AArch64::BI__builtin_neon_vrhadd_v: 3100 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrhadd_v, E); 3101 case AArch64::BI__builtin_neon_vrhaddq_v: 3102 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrhaddq_v, E); 3103 case AArch64::BI__builtin_neon_vqadd_v: 3104 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqadd_v, E); 3105 case AArch64::BI__builtin_neon_vqaddq_v: 3106 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqaddq_v, E); 3107 case AArch64::BI__builtin_neon_vqsub_v: 3108 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqsub_v, E); 3109 case AArch64::BI__builtin_neon_vqsubq_v: 3110 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqsubq_v, E); 3111 case AArch64::BI__builtin_neon_vshl_v: 3112 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshl_v, E); 3113 case AArch64::BI__builtin_neon_vshlq_v: 3114 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshlq_v, E); 3115 case AArch64::BI__builtin_neon_vqshl_v: 3116 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshl_v, E); 3117 case AArch64::BI__builtin_neon_vqshlq_v: 3118 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshlq_v, E); 3119 case AArch64::BI__builtin_neon_vrshl_v: 3120 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrshl_v, E); 3121 case AArch64::BI__builtin_neon_vrshlq_v: 3122 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrshlq_v, E); 3123 case AArch64::BI__builtin_neon_vqrshl_v: 3124 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshl_v, E); 3125 case AArch64::BI__builtin_neon_vqrshlq_v: 3126 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshlq_v, E); 3127 case AArch64::BI__builtin_neon_vaddhn_v: 3128 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vaddhn_v, E); 3129 case AArch64::BI__builtin_neon_vraddhn_v: 3130 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vraddhn_v, E); 3131 case AArch64::BI__builtin_neon_vsubhn_v: 3132 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsubhn_v, E); 3133 case AArch64::BI__builtin_neon_vrsubhn_v: 3134 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsubhn_v, E); 3135 case AArch64::BI__builtin_neon_vmull_v: 3136 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmull_v, E); 3137 case AArch64::BI__builtin_neon_vqdmull_v: 3138 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmull_v, E); 3139 case AArch64::BI__builtin_neon_vqdmlal_v: 3140 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmlal_v, E); 3141 case AArch64::BI__builtin_neon_vqdmlsl_v: 3142 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmlsl_v, E); 3143 case AArch64::BI__builtin_neon_vmax_v: 3144 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmax_v, E); 3145 case AArch64::BI__builtin_neon_vmaxq_v: 3146 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmaxq_v, E); 3147 case AArch64::BI__builtin_neon_vmin_v: 3148 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmin_v, E); 3149 case AArch64::BI__builtin_neon_vminq_v: 3150 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vminq_v, E); 3151 case AArch64::BI__builtin_neon_vpmax_v: 3152 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpmax_v, E); 3153 case AArch64::BI__builtin_neon_vpmin_v: 3154 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpmin_v, E); 3155 case AArch64::BI__builtin_neon_vpadd_v: 3156 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpadd_v, E); 3157 case AArch64::BI__builtin_neon_vqdmulh_v: 3158 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmulh_v, E); 3159 case AArch64::BI__builtin_neon_vqdmulhq_v: 3160 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmulhq_v, E); 3161 case AArch64::BI__builtin_neon_vqrdmulh_v: 3162 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulh_v, E); 3163 case AArch64::BI__builtin_neon_vqrdmulhq_v: 3164 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulhq_v, E); 3165 3166 // Shift by immediate 3167 case AArch64::BI__builtin_neon_vshr_n_v: 3168 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshr_n_v, E); 3169 case AArch64::BI__builtin_neon_vshrq_n_v: 3170 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshrq_n_v, E); 3171 case AArch64::BI__builtin_neon_vrshr_n_v: 3172 case AArch64::BI__builtin_neon_vrshrq_n_v: 3173 Int = usgn ? Intrinsic::aarch64_neon_vurshr 3174 : Intrinsic::aarch64_neon_vsrshr; 3175 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n"); 3176 case AArch64::BI__builtin_neon_vsra_n_v: 3177 if (VTy->getElementType()->isIntegerTy(64)) { 3178 Int = usgn ? Intrinsic::aarch64_neon_vsradu_n 3179 : Intrinsic::aarch64_neon_vsrads_n; 3180 return EmitNeonCall(CGM.getIntrinsic(Int), Ops, "vsra_n"); 3181 } 3182 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsra_n_v, E); 3183 case AArch64::BI__builtin_neon_vsraq_n_v: 3184 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsraq_n_v, E); 3185 case AArch64::BI__builtin_neon_vrsra_n_v: 3186 if (VTy->getElementType()->isIntegerTy(64)) { 3187 Int = usgn ? Intrinsic::aarch64_neon_vrsradu_n 3188 : Intrinsic::aarch64_neon_vrsrads_n; 3189 return EmitNeonCall(CGM.getIntrinsic(Int), Ops, "vrsra_n"); 3190 } 3191 // fall through 3192 case AArch64::BI__builtin_neon_vrsraq_n_v: { 3193 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3194 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3195 Int = usgn ? Intrinsic::aarch64_neon_vurshr 3196 : Intrinsic::aarch64_neon_vsrshr; 3197 Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]); 3198 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 3199 } 3200 case AArch64::BI__builtin_neon_vshl_n_v: 3201 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshl_n_v, E); 3202 case AArch64::BI__builtin_neon_vshlq_n_v: 3203 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshlq_n_v, E); 3204 case AArch64::BI__builtin_neon_vqshl_n_v: 3205 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshl_n_v, E); 3206 case AArch64::BI__builtin_neon_vqshlq_n_v: 3207 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshlq_n_v, E); 3208 case AArch64::BI__builtin_neon_vqshlu_n_v: 3209 case AArch64::BI__builtin_neon_vqshluq_n_v: 3210 Int = Intrinsic::aarch64_neon_vsqshlu; 3211 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n"); 3212 case AArch64::BI__builtin_neon_vsri_n_v: 3213 case AArch64::BI__builtin_neon_vsriq_n_v: 3214 Int = Intrinsic::aarch64_neon_vsri; 3215 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsri_n"); 3216 case AArch64::BI__builtin_neon_vsli_n_v: 3217 case AArch64::BI__builtin_neon_vsliq_n_v: 3218 Int = Intrinsic::aarch64_neon_vsli; 3219 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsli_n"); 3220 case AArch64::BI__builtin_neon_vshll_n_v: { 3221 llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); 3222 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3223 if (usgn) 3224 Ops[0] = Builder.CreateZExt(Ops[0], VTy); 3225 else 3226 Ops[0] = Builder.CreateSExt(Ops[0], VTy); 3227 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); 3228 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); 3229 } 3230 case AArch64::BI__builtin_neon_vshrn_n_v: { 3231 llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); 3232 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3233 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); 3234 if (usgn) 3235 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); 3236 else 3237 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); 3238 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); 3239 } 3240 case AArch64::BI__builtin_neon_vqshrun_n_v: 3241 Int = Intrinsic::aarch64_neon_vsqshrun; 3242 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); 3243 case AArch64::BI__builtin_neon_vrshrn_n_v: 3244 Int = Intrinsic::aarch64_neon_vrshrn; 3245 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); 3246 case AArch64::BI__builtin_neon_vqrshrun_n_v: 3247 Int = Intrinsic::aarch64_neon_vsqrshrun; 3248 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); 3249 case AArch64::BI__builtin_neon_vqshrn_n_v: 3250 Int = usgn ? Intrinsic::aarch64_neon_vuqshrn 3251 : Intrinsic::aarch64_neon_vsqshrn; 3252 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); 3253 case AArch64::BI__builtin_neon_vqrshrn_n_v: 3254 Int = usgn ? Intrinsic::aarch64_neon_vuqrshrn 3255 : Intrinsic::aarch64_neon_vsqrshrn; 3256 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); 3257 3258 // Convert 3259 case AArch64::BI__builtin_neon_vmovl_v: 3260 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmovl_v, E); 3261 case AArch64::BI__builtin_neon_vcvt_n_f32_v: 3262 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_f32_v, E); 3263 case AArch64::BI__builtin_neon_vcvtq_n_f32_v: 3264 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_f32_v, E); 3265 case AArch64::BI__builtin_neon_vcvt_n_f64_v: 3266 case AArch64::BI__builtin_neon_vcvtq_n_f64_v: { 3267 llvm::Type *FloatTy = 3268 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 3269 llvm::Type *Tys[2] = { FloatTy, Ty }; 3270 Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp 3271 : Intrinsic::arm_neon_vcvtfxs2fp; 3272 Function *F = CGM.getIntrinsic(Int, Tys); 3273 return EmitNeonCall(F, Ops, "vcvt_n"); 3274 } 3275 case AArch64::BI__builtin_neon_vcvt_n_s32_v: 3276 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_s32_v, E); 3277 case AArch64::BI__builtin_neon_vcvtq_n_s32_v: 3278 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_s32_v, E); 3279 case AArch64::BI__builtin_neon_vcvt_n_u32_v: 3280 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_u32_v, E); 3281 case AArch64::BI__builtin_neon_vcvtq_n_u32_v: 3282 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_u32_v, E); 3283 case AArch64::BI__builtin_neon_vcvt_n_s64_v: 3284 case AArch64::BI__builtin_neon_vcvt_n_u64_v: 3285 case AArch64::BI__builtin_neon_vcvtq_n_s64_v: 3286 case AArch64::BI__builtin_neon_vcvtq_n_u64_v: { 3287 llvm::Type *FloatTy = 3288 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 3289 llvm::Type *Tys[2] = { Ty, FloatTy }; 3290 Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu 3291 : Intrinsic::arm_neon_vcvtfp2fxs; 3292 Function *F = CGM.getIntrinsic(Int, Tys); 3293 return EmitNeonCall(F, Ops, "vcvt_n"); 3294 } 3295 3296 // Load/Store 3297 case AArch64::BI__builtin_neon_vld1_v: 3298 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1_v, E); 3299 case AArch64::BI__builtin_neon_vld1q_v: 3300 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1q_v, E); 3301 case AArch64::BI__builtin_neon_vld2_v: 3302 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2_v, E); 3303 case AArch64::BI__builtin_neon_vld2q_v: 3304 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_v, E); 3305 case AArch64::BI__builtin_neon_vld3_v: 3306 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3_v, E); 3307 case AArch64::BI__builtin_neon_vld3q_v: 3308 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3q_v, E); 3309 case AArch64::BI__builtin_neon_vld4_v: 3310 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4_v, E); 3311 case AArch64::BI__builtin_neon_vld4q_v: 3312 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4q_v, E); 3313 case AArch64::BI__builtin_neon_vst1_v: 3314 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst1_v, E); 3315 case AArch64::BI__builtin_neon_vst1q_v: 3316 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst1q_v, E); 3317 case AArch64::BI__builtin_neon_vst2_v: 3318 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2_v, E); 3319 case AArch64::BI__builtin_neon_vst2q_v: 3320 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2q_v, E); 3321 case AArch64::BI__builtin_neon_vst3_v: 3322 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3_v, E); 3323 case AArch64::BI__builtin_neon_vst3q_v: 3324 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3q_v, E); 3325 case AArch64::BI__builtin_neon_vst4_v: 3326 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4_v, E); 3327 case AArch64::BI__builtin_neon_vst4q_v: 3328 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_v, E); 3329 case AArch64::BI__builtin_neon_vld1_x2_v: 3330 case AArch64::BI__builtin_neon_vld1q_x2_v: 3331 case AArch64::BI__builtin_neon_vld1_x3_v: 3332 case AArch64::BI__builtin_neon_vld1q_x3_v: 3333 case AArch64::BI__builtin_neon_vld1_x4_v: 3334 case AArch64::BI__builtin_neon_vld1q_x4_v: { 3335 unsigned Int; 3336 switch (BuiltinID) { 3337 case AArch64::BI__builtin_neon_vld1_x2_v: 3338 case AArch64::BI__builtin_neon_vld1q_x2_v: 3339 Int = Intrinsic::aarch64_neon_vld1x2; 3340 break; 3341 case AArch64::BI__builtin_neon_vld1_x3_v: 3342 case AArch64::BI__builtin_neon_vld1q_x3_v: 3343 Int = Intrinsic::aarch64_neon_vld1x3; 3344 break; 3345 case AArch64::BI__builtin_neon_vld1_x4_v: 3346 case AArch64::BI__builtin_neon_vld1q_x4_v: 3347 Int = Intrinsic::aarch64_neon_vld1x4; 3348 break; 3349 } 3350 Function *F = CGM.getIntrinsic(Int, Ty); 3351 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld1xN"); 3352 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3353 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3354 return Builder.CreateStore(Ops[1], Ops[0]); 3355 } 3356 case AArch64::BI__builtin_neon_vst1_x2_v: 3357 case AArch64::BI__builtin_neon_vst1q_x2_v: 3358 case AArch64::BI__builtin_neon_vst1_x3_v: 3359 case AArch64::BI__builtin_neon_vst1q_x3_v: 3360 case AArch64::BI__builtin_neon_vst1_x4_v: 3361 case AArch64::BI__builtin_neon_vst1q_x4_v: { 3362 Ops.push_back(Align); 3363 unsigned Int; 3364 switch (BuiltinID) { 3365 case AArch64::BI__builtin_neon_vst1_x2_v: 3366 case AArch64::BI__builtin_neon_vst1q_x2_v: 3367 Int = Intrinsic::aarch64_neon_vst1x2; 3368 break; 3369 case AArch64::BI__builtin_neon_vst1_x3_v: 3370 case AArch64::BI__builtin_neon_vst1q_x3_v: 3371 Int = Intrinsic::aarch64_neon_vst1x3; 3372 break; 3373 case AArch64::BI__builtin_neon_vst1_x4_v: 3374 case AArch64::BI__builtin_neon_vst1q_x4_v: 3375 Int = Intrinsic::aarch64_neon_vst1x4; 3376 break; 3377 } 3378 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, ""); 3379 } 3380 case AArch64::BI__builtin_neon_vld1_lane_v: 3381 case AArch64::BI__builtin_neon_vld1q_lane_v: { 3382 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3383 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 3384 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3385 LoadInst *Ld = Builder.CreateLoad(Ops[0]); 3386 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 3387 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 3388 } 3389 case AArch64::BI__builtin_neon_vld2_lane_v: 3390 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_lane_v, E); 3391 case AArch64::BI__builtin_neon_vld2q_lane_v: 3392 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_lane_v, E); 3393 case AArch64::BI__builtin_neon_vld3_lane_v: 3394 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3_lane_v, E); 3395 case AArch64::BI__builtin_neon_vld3q_lane_v: 3396 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3q_lane_v, E); 3397 case AArch64::BI__builtin_neon_vld4_lane_v: 3398 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4_lane_v, E); 3399 case AArch64::BI__builtin_neon_vld4q_lane_v: 3400 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4q_lane_v, E); 3401 case AArch64::BI__builtin_neon_vst1_lane_v: 3402 case AArch64::BI__builtin_neon_vst1q_lane_v: { 3403 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3404 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 3405 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3406 StoreInst *St = 3407 Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty)); 3408 St->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 3409 return St; 3410 } 3411 case AArch64::BI__builtin_neon_vst2_lane_v: 3412 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2_lane_v, E); 3413 case AArch64::BI__builtin_neon_vst2q_lane_v: 3414 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2q_lane_v, E); 3415 case AArch64::BI__builtin_neon_vst3_lane_v: 3416 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3_lane_v, E); 3417 case AArch64::BI__builtin_neon_vst3q_lane_v: 3418 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3q_lane_v, E); 3419 case AArch64::BI__builtin_neon_vst4_lane_v: 3420 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4_lane_v, E); 3421 case AArch64::BI__builtin_neon_vst4q_lane_v: 3422 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_lane_v, E); 3423 case AArch64::BI__builtin_neon_vld1_dup_v: 3424 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1_dup_v, E); 3425 case AArch64::BI__builtin_neon_vld1q_dup_v: 3426 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1q_dup_v, E); 3427 case AArch64::BI__builtin_neon_vld2_dup_v: 3428 case AArch64::BI__builtin_neon_vld2q_dup_v: 3429 case AArch64::BI__builtin_neon_vld3_dup_v: 3430 case AArch64::BI__builtin_neon_vld3q_dup_v: 3431 case AArch64::BI__builtin_neon_vld4_dup_v: 3432 case AArch64::BI__builtin_neon_vld4q_dup_v: { 3433 // Handle 64-bit x 1 elements as a special-case. There is no "dup" needed. 3434 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64 && 3435 VTy->getNumElements() == 1) { 3436 switch (BuiltinID) { 3437 case AArch64::BI__builtin_neon_vld2_dup_v: 3438 Int = Intrinsic::arm_neon_vld2; 3439 break; 3440 case AArch64::BI__builtin_neon_vld3_dup_v: 3441 Int = Intrinsic::arm_neon_vld3; 3442 break; 3443 case AArch64::BI__builtin_neon_vld4_dup_v: 3444 Int = Intrinsic::arm_neon_vld4; 3445 break; 3446 default: 3447 llvm_unreachable("unknown vld_dup intrinsic?"); 3448 } 3449 Function *F = CGM.getIntrinsic(Int, Ty); 3450 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup"); 3451 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3452 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3453 return Builder.CreateStore(Ops[1], Ops[0]); 3454 } 3455 switch (BuiltinID) { 3456 case AArch64::BI__builtin_neon_vld2_dup_v: 3457 case AArch64::BI__builtin_neon_vld2q_dup_v: 3458 Int = Intrinsic::arm_neon_vld2lane; 3459 break; 3460 case AArch64::BI__builtin_neon_vld3_dup_v: 3461 case AArch64::BI__builtin_neon_vld3q_dup_v: 3462 Int = Intrinsic::arm_neon_vld3lane; 3463 break; 3464 case AArch64::BI__builtin_neon_vld4_dup_v: 3465 case AArch64::BI__builtin_neon_vld4q_dup_v: 3466 Int = Intrinsic::arm_neon_vld4lane; 3467 break; 3468 } 3469 Function *F = CGM.getIntrinsic(Int, Ty); 3470 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 3471 3472 SmallVector<Value *, 6> Args; 3473 Args.push_back(Ops[1]); 3474 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 3475 3476 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 3477 Args.push_back(CI); 3478 Args.push_back(Align); 3479 3480 Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); 3481 // splat lane 0 to all elts in each vector of the result. 3482 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 3483 Value *Val = Builder.CreateExtractValue(Ops[1], i); 3484 Value *Elt = Builder.CreateBitCast(Val, Ty); 3485 Elt = EmitNeonSplat(Elt, CI); 3486 Elt = Builder.CreateBitCast(Elt, Val->getType()); 3487 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 3488 } 3489 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3490 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3491 return Builder.CreateStore(Ops[1], Ops[0]); 3492 } 3493 3494 // Crypto 3495 case AArch64::BI__builtin_neon_vaeseq_v: 3496 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aese, Ty), 3497 Ops, "aese"); 3498 case AArch64::BI__builtin_neon_vaesdq_v: 3499 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesd, Ty), 3500 Ops, "aesd"); 3501 case AArch64::BI__builtin_neon_vaesmcq_v: 3502 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesmc, Ty), 3503 Ops, "aesmc"); 3504 case AArch64::BI__builtin_neon_vaesimcq_v: 3505 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesimc, Ty), 3506 Ops, "aesimc"); 3507 case AArch64::BI__builtin_neon_vsha1su1q_v: 3508 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1su1, Ty), 3509 Ops, "sha1su1"); 3510 case AArch64::BI__builtin_neon_vsha256su0q_v: 3511 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256su0, Ty), 3512 Ops, "sha256su0"); 3513 case AArch64::BI__builtin_neon_vsha1su0q_v: 3514 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1su0, Ty), 3515 Ops, "sha1su0"); 3516 case AArch64::BI__builtin_neon_vsha256hq_v: 3517 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256h, Ty), 3518 Ops, "sha256h"); 3519 case AArch64::BI__builtin_neon_vsha256h2q_v: 3520 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256h2, Ty), 3521 Ops, "sha256h2"); 3522 case AArch64::BI__builtin_neon_vsha256su1q_v: 3523 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256su1, Ty), 3524 Ops, "sha256su1"); 3525 case AArch64::BI__builtin_neon_vmul_lane_v: 3526 case AArch64::BI__builtin_neon_vmul_laneq_v: { 3527 // v1f64 vmul_lane should be mapped to Neon scalar mul lane 3528 bool Quad = false; 3529 if (BuiltinID == AArch64::BI__builtin_neon_vmul_laneq_v) 3530 Quad = true; 3531 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 3532 llvm::Type *VTy = GetNeonType(this, 3533 NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); 3534 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 3535 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 3536 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); 3537 return Builder.CreateBitCast(Result, Ty); 3538 } 3539 3540 // AArch64-only builtins 3541 case AArch64::BI__builtin_neon_vfmaq_laneq_v: { 3542 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 3543 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3544 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3545 3546 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3547 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 3548 return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); 3549 } 3550 case AArch64::BI__builtin_neon_vfmaq_lane_v: { 3551 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 3552 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3553 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3554 3555 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 3556 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 3557 VTy->getNumElements() / 2); 3558 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 3559 Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), 3560 cast<ConstantInt>(Ops[3])); 3561 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 3562 3563 return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); 3564 } 3565 case AArch64::BI__builtin_neon_vfma_lane_v: { 3566 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 3567 // v1f64 fma should be mapped to Neon scalar f64 fma 3568 if (VTy && VTy->getElementType() == DoubleTy) { 3569 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 3570 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 3571 llvm::Type *VTy = GetNeonType(this, 3572 NeonTypeFlags(NeonTypeFlags::Float64, false, false)); 3573 Ops[2] = Builder.CreateBitCast(Ops[2], VTy); 3574 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 3575 Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); 3576 Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 3577 return Builder.CreateBitCast(Result, Ty); 3578 } 3579 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 3580 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3581 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3582 3583 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3584 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 3585 return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); 3586 } 3587 case AArch64::BI__builtin_neon_vfma_laneq_v: { 3588 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 3589 // v1f64 fma should be mapped to Neon scalar f64 fma 3590 if (VTy && VTy->getElementType() == DoubleTy) { 3591 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 3592 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 3593 llvm::Type *VTy = GetNeonType(this, 3594 NeonTypeFlags(NeonTypeFlags::Float64, false, true)); 3595 Ops[2] = Builder.CreateBitCast(Ops[2], VTy); 3596 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 3597 Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); 3598 Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 3599 return Builder.CreateBitCast(Result, Ty); 3600 } 3601 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 3602 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3603 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3604 3605 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 3606 VTy->getNumElements() * 2); 3607 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 3608 Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), 3609 cast<ConstantInt>(Ops[3])); 3610 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 3611 3612 return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); 3613 } 3614 case AArch64::BI__builtin_neon_vfms_v: 3615 case AArch64::BI__builtin_neon_vfmsq_v: { 3616 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 3617 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3618 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3619 Ops[1] = Builder.CreateFNeg(Ops[1]); 3620 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3621 3622 // LLVM's fma intrinsic puts the accumulator in the last position, but the 3623 // AArch64 intrinsic has it first. 3624 return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 3625 } 3626 case AArch64::BI__builtin_neon_vmaxnm_v: 3627 case AArch64::BI__builtin_neon_vmaxnmq_v: { 3628 Int = Intrinsic::aarch64_neon_vmaxnm; 3629 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); 3630 } 3631 case AArch64::BI__builtin_neon_vminnm_v: 3632 case AArch64::BI__builtin_neon_vminnmq_v: { 3633 Int = Intrinsic::aarch64_neon_vminnm; 3634 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); 3635 } 3636 case AArch64::BI__builtin_neon_vpmaxnm_v: 3637 case AArch64::BI__builtin_neon_vpmaxnmq_v: { 3638 Int = Intrinsic::aarch64_neon_vpmaxnm; 3639 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); 3640 } 3641 case AArch64::BI__builtin_neon_vpminnm_v: 3642 case AArch64::BI__builtin_neon_vpminnmq_v: { 3643 Int = Intrinsic::aarch64_neon_vpminnm; 3644 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); 3645 } 3646 case AArch64::BI__builtin_neon_vpmaxq_v: { 3647 Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs; 3648 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 3649 } 3650 case AArch64::BI__builtin_neon_vpminq_v: { 3651 Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins; 3652 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 3653 } 3654 case AArch64::BI__builtin_neon_vpaddq_v: { 3655 Int = Intrinsic::arm_neon_vpadd; 3656 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpadd"); 3657 } 3658 case AArch64::BI__builtin_neon_vmulx_v: 3659 case AArch64::BI__builtin_neon_vmulxq_v: { 3660 Int = Intrinsic::aarch64_neon_vmulx; 3661 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 3662 } 3663 case AArch64::BI__builtin_neon_vpaddl_v: 3664 case AArch64::BI__builtin_neon_vpaddlq_v: 3665 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpaddl_v, E); 3666 case AArch64::BI__builtin_neon_vpadal_v: 3667 case AArch64::BI__builtin_neon_vpadalq_v: 3668 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpadal_v, E); 3669 case AArch64::BI__builtin_neon_vqabs_v: 3670 case AArch64::BI__builtin_neon_vqabsq_v: 3671 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqabs_v, E); 3672 case AArch64::BI__builtin_neon_vqneg_v: 3673 case AArch64::BI__builtin_neon_vqnegq_v: 3674 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqneg_v, E); 3675 case AArch64::BI__builtin_neon_vabs_v: 3676 case AArch64::BI__builtin_neon_vabsq_v: { 3677 if (VTy->getElementType()->isFloatingPointTy()) { 3678 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); 3679 } 3680 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabs_v, E); 3681 } 3682 case AArch64::BI__builtin_neon_vsqadd_v: 3683 case AArch64::BI__builtin_neon_vsqaddq_v: { 3684 Int = Intrinsic::aarch64_neon_usqadd; 3685 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); 3686 } 3687 case AArch64::BI__builtin_neon_vuqadd_v: 3688 case AArch64::BI__builtin_neon_vuqaddq_v: { 3689 Int = Intrinsic::aarch64_neon_suqadd; 3690 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); 3691 } 3692 case AArch64::BI__builtin_neon_vcls_v: 3693 case AArch64::BI__builtin_neon_vclsq_v: 3694 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcls_v, E); 3695 case AArch64::BI__builtin_neon_vclz_v: 3696 case AArch64::BI__builtin_neon_vclzq_v: 3697 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vclz_v, E); 3698 case AArch64::BI__builtin_neon_vcnt_v: 3699 case AArch64::BI__builtin_neon_vcntq_v: 3700 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcnt_v, E); 3701 case AArch64::BI__builtin_neon_vrbit_v: 3702 case AArch64::BI__builtin_neon_vrbitq_v: 3703 Int = Intrinsic::aarch64_neon_rbit; 3704 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); 3705 case AArch64::BI__builtin_neon_vmovn_v: 3706 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmovn_v, E); 3707 case AArch64::BI__builtin_neon_vqmovun_v: 3708 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqmovun_v, E); 3709 case AArch64::BI__builtin_neon_vqmovn_v: 3710 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqmovn_v, E); 3711 case AArch64::BI__builtin_neon_vcvt_f16_v: 3712 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_f16_v, E); 3713 case AArch64::BI__builtin_neon_vcvt_f32_f16: 3714 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_f32_f16, E); 3715 case AArch64::BI__builtin_neon_vcvt_f32_f64: { 3716 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3717 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, false)); 3718 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); 3719 } 3720 case AArch64::BI__builtin_neon_vcvtx_f32_v: { 3721 llvm::Type *EltTy = FloatTy; 3722 llvm::Type *ResTy = llvm::VectorType::get(EltTy, 2); 3723 llvm::Type *Tys[2] = { ResTy, Ty }; 3724 Int = Intrinsic::aarch64_neon_vcvtxn; 3725 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtx_f32_f64"); 3726 } 3727 case AArch64::BI__builtin_neon_vcvt_f64_f32: { 3728 llvm::Type *OpTy = 3729 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, false)); 3730 Ops[0] = Builder.CreateBitCast(Ops[0], OpTy); 3731 return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); 3732 } 3733 case AArch64::BI__builtin_neon_vcvt_f64_v: 3734 case AArch64::BI__builtin_neon_vcvtq_f64_v: { 3735 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3736 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 3737 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 3738 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 3739 } 3740 case AArch64::BI__builtin_neon_vrndn_v: 3741 case AArch64::BI__builtin_neon_vrndnq_v: { 3742 Int = Intrinsic::aarch64_neon_frintn; 3743 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); 3744 } 3745 case AArch64::BI__builtin_neon_vrnda_v: 3746 case AArch64::BI__builtin_neon_vrndaq_v: { 3747 Int = Intrinsic::round; 3748 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); 3749 } 3750 case AArch64::BI__builtin_neon_vrndp_v: 3751 case AArch64::BI__builtin_neon_vrndpq_v: { 3752 Int = Intrinsic::ceil; 3753 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); 3754 } 3755 case AArch64::BI__builtin_neon_vrndm_v: 3756 case AArch64::BI__builtin_neon_vrndmq_v: { 3757 Int = Intrinsic::floor; 3758 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); 3759 } 3760 case AArch64::BI__builtin_neon_vrndx_v: 3761 case AArch64::BI__builtin_neon_vrndxq_v: { 3762 Int = Intrinsic::rint; 3763 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); 3764 } 3765 case AArch64::BI__builtin_neon_vrnd_v: 3766 case AArch64::BI__builtin_neon_vrndq_v: { 3767 Int = Intrinsic::trunc; 3768 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd"); 3769 } 3770 case AArch64::BI__builtin_neon_vrndi_v: 3771 case AArch64::BI__builtin_neon_vrndiq_v: { 3772 Int = Intrinsic::nearbyint; 3773 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); 3774 } 3775 case AArch64::BI__builtin_neon_vcvt_s32_v: 3776 case AArch64::BI__builtin_neon_vcvt_u32_v: 3777 case AArch64::BI__builtin_neon_vcvtq_s32_v: 3778 case AArch64::BI__builtin_neon_vcvtq_u32_v: 3779 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_u32_v, E); 3780 case AArch64::BI__builtin_neon_vcvt_s64_v: 3781 case AArch64::BI__builtin_neon_vcvt_u64_v: 3782 case AArch64::BI__builtin_neon_vcvtq_s64_v: 3783 case AArch64::BI__builtin_neon_vcvtq_u64_v: { 3784 llvm::Type *DoubleTy = 3785 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 3786 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 3787 return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 3788 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 3789 } 3790 case AArch64::BI__builtin_neon_vcvtn_s32_v: 3791 case AArch64::BI__builtin_neon_vcvtnq_s32_v: { 3792 llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements()); 3793 llvm::Type *Tys[2] = { Ty, OpTy }; 3794 Int = Intrinsic::arm_neon_vcvtns; 3795 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtns_f32"); 3796 } 3797 case AArch64::BI__builtin_neon_vcvtn_s64_v: 3798 case AArch64::BI__builtin_neon_vcvtnq_s64_v: { 3799 llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements()); 3800 llvm::Type *Tys[2] = { Ty, OpTy }; 3801 Int = Intrinsic::arm_neon_vcvtns; 3802 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtns_f64"); 3803 } 3804 case AArch64::BI__builtin_neon_vcvtn_u32_v: 3805 case AArch64::BI__builtin_neon_vcvtnq_u32_v: { 3806 llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements()); 3807 llvm::Type *Tys[2] = { Ty, OpTy }; 3808 Int = Intrinsic::arm_neon_vcvtnu; 3809 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtnu_f32"); 3810 } 3811 case AArch64::BI__builtin_neon_vcvtn_u64_v: 3812 case AArch64::BI__builtin_neon_vcvtnq_u64_v: { 3813 llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements()); 3814 llvm::Type *Tys[2] = { Ty, OpTy }; 3815 Int = Intrinsic::arm_neon_vcvtnu; 3816 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtnu_f64"); 3817 } 3818 case AArch64::BI__builtin_neon_vcvtp_s32_v: 3819 case AArch64::BI__builtin_neon_vcvtpq_s32_v: { 3820 llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements()); 3821 llvm::Type *Tys[2] = { Ty, OpTy }; 3822 Int = Intrinsic::arm_neon_vcvtps; 3823 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtps_f32"); 3824 } 3825 case AArch64::BI__builtin_neon_vcvtp_s64_v: 3826 case AArch64::BI__builtin_neon_vcvtpq_s64_v: { 3827 llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements()); 3828 llvm::Type *Tys[2] = { Ty, OpTy }; 3829 Int = Intrinsic::arm_neon_vcvtps; 3830 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtps_f64"); 3831 } 3832 case AArch64::BI__builtin_neon_vcvtp_u32_v: 3833 case AArch64::BI__builtin_neon_vcvtpq_u32_v: { 3834 llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements()); 3835 llvm::Type *Tys[2] = { Ty, OpTy }; 3836 Int = Intrinsic::arm_neon_vcvtpu; 3837 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtpu_f32"); 3838 } 3839 case AArch64::BI__builtin_neon_vcvtp_u64_v: 3840 case AArch64::BI__builtin_neon_vcvtpq_u64_v: { 3841 llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements()); 3842 llvm::Type *Tys[2] = { Ty, OpTy }; 3843 Int = Intrinsic::arm_neon_vcvtpu; 3844 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtpu_f64"); 3845 } 3846 case AArch64::BI__builtin_neon_vcvtm_s32_v: 3847 case AArch64::BI__builtin_neon_vcvtmq_s32_v: { 3848 llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements()); 3849 llvm::Type *Tys[2] = { Ty, OpTy }; 3850 Int = Intrinsic::arm_neon_vcvtms; 3851 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtms_f32"); 3852 } 3853 case AArch64::BI__builtin_neon_vcvtm_s64_v: 3854 case AArch64::BI__builtin_neon_vcvtmq_s64_v: { 3855 llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements()); 3856 llvm::Type *Tys[2] = { Ty, OpTy }; 3857 Int = Intrinsic::arm_neon_vcvtms; 3858 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtms_f64"); 3859 } 3860 case AArch64::BI__builtin_neon_vcvtm_u32_v: 3861 case AArch64::BI__builtin_neon_vcvtmq_u32_v: { 3862 llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements()); 3863 llvm::Type *Tys[2] = { Ty, OpTy }; 3864 Int = Intrinsic::arm_neon_vcvtmu; 3865 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtmu_f32"); 3866 } 3867 case AArch64::BI__builtin_neon_vcvtm_u64_v: 3868 case AArch64::BI__builtin_neon_vcvtmq_u64_v: { 3869 llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements()); 3870 llvm::Type *Tys[2] = { Ty, OpTy }; 3871 Int = Intrinsic::arm_neon_vcvtmu; 3872 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtmu_f64"); 3873 } 3874 case AArch64::BI__builtin_neon_vcvta_s32_v: 3875 case AArch64::BI__builtin_neon_vcvtaq_s32_v: { 3876 llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements()); 3877 llvm::Type *Tys[2] = { Ty, OpTy }; 3878 Int = Intrinsic::arm_neon_vcvtas; 3879 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtas_f32"); 3880 } 3881 case AArch64::BI__builtin_neon_vcvta_s64_v: 3882 case AArch64::BI__builtin_neon_vcvtaq_s64_v: { 3883 llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements()); 3884 llvm::Type *Tys[2] = { Ty, OpTy }; 3885 Int = Intrinsic::arm_neon_vcvtas; 3886 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtas_f64"); 3887 } 3888 case AArch64::BI__builtin_neon_vcvta_u32_v: 3889 case AArch64::BI__builtin_neon_vcvtaq_u32_v: { 3890 llvm::Type *OpTy = llvm::VectorType::get(FloatTy, VTy->getNumElements()); 3891 llvm::Type *Tys[2] = { Ty, OpTy }; 3892 Int = Intrinsic::arm_neon_vcvtau; 3893 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtau_f32"); 3894 } 3895 case AArch64::BI__builtin_neon_vcvta_u64_v: 3896 case AArch64::BI__builtin_neon_vcvtaq_u64_v: { 3897 llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements()); 3898 llvm::Type *Tys[2] = { Ty, OpTy }; 3899 Int = Intrinsic::arm_neon_vcvtau; 3900 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtau_f64"); 3901 } 3902 case AArch64::BI__builtin_neon_vrecpe_v: 3903 case AArch64::BI__builtin_neon_vrecpeq_v: 3904 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecpe_v, E); 3905 case AArch64::BI__builtin_neon_vrsqrte_v: 3906 case AArch64::BI__builtin_neon_vrsqrteq_v: 3907 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrte_v, E); 3908 case AArch64::BI__builtin_neon_vsqrt_v: 3909 case AArch64::BI__builtin_neon_vsqrtq_v: { 3910 Int = Intrinsic::sqrt; 3911 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); 3912 } 3913 case AArch64::BI__builtin_neon_vcvt_f32_v: 3914 case AArch64::BI__builtin_neon_vcvtq_f32_v: 3915 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_f32_v, E); 3916 case AArch64::BI__builtin_neon_vceqz_v: 3917 case AArch64::BI__builtin_neon_vceqzq_v: 3918 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, 3919 ICmpInst::ICMP_EQ, "vceqz"); 3920 case AArch64::BI__builtin_neon_vcgez_v: 3921 case AArch64::BI__builtin_neon_vcgezq_v: 3922 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, 3923 ICmpInst::ICMP_SGE, "vcgez"); 3924 case AArch64::BI__builtin_neon_vclez_v: 3925 case AArch64::BI__builtin_neon_vclezq_v: 3926 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, 3927 ICmpInst::ICMP_SLE, "vclez"); 3928 case AArch64::BI__builtin_neon_vcgtz_v: 3929 case AArch64::BI__builtin_neon_vcgtzq_v: 3930 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, 3931 ICmpInst::ICMP_SGT, "vcgtz"); 3932 case AArch64::BI__builtin_neon_vcltz_v: 3933 case AArch64::BI__builtin_neon_vcltzq_v: 3934 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, 3935 ICmpInst::ICMP_SLT, "vcltz"); 3936 } 3937 } 3938 3939 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 3940 const CallExpr *E) { 3941 if (BuiltinID == ARM::BI__clear_cache) { 3942 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 3943 const FunctionDecl *FD = E->getDirectCallee(); 3944 SmallVector<Value*, 2> Ops; 3945 for (unsigned i = 0; i < 2; i++) 3946 Ops.push_back(EmitScalarExpr(E->getArg(i))); 3947 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 3948 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 3949 StringRef Name = FD->getName(); 3950 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 3951 } 3952 3953 if (BuiltinID == ARM::BI__builtin_arm_ldrexd || 3954 (BuiltinID == ARM::BI__builtin_arm_ldrex && 3955 getContext().getTypeSize(E->getType()) == 64)) { 3956 Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 3957 3958 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 3959 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 3960 "ldrexd"); 3961 3962 Value *Val0 = Builder.CreateExtractValue(Val, 1); 3963 Value *Val1 = Builder.CreateExtractValue(Val, 0); 3964 Val0 = Builder.CreateZExt(Val0, Int64Ty); 3965 Val1 = Builder.CreateZExt(Val1, Int64Ty); 3966 3967 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 3968 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 3969 Val = Builder.CreateOr(Val, Val1); 3970 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 3971 } 3972 3973 if (BuiltinID == ARM::BI__builtin_arm_ldrex) { 3974 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 3975 3976 QualType Ty = E->getType(); 3977 llvm::Type *RealResTy = ConvertType(Ty); 3978 llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(), 3979 getContext().getTypeSize(Ty)); 3980 LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo()); 3981 3982 Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrex, LoadAddr->getType()); 3983 Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); 3984 3985 if (RealResTy->isPointerTy()) 3986 return Builder.CreateIntToPtr(Val, RealResTy); 3987 else { 3988 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 3989 return Builder.CreateBitCast(Val, RealResTy); 3990 } 3991 } 3992 3993 if (BuiltinID == ARM::BI__builtin_arm_strexd || 3994 (BuiltinID == ARM::BI__builtin_arm_strex && 3995 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { 3996 Function *F = CGM.getIntrinsic(Intrinsic::arm_strexd); 3997 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, NULL); 3998 3999 Value *Tmp = CreateMemTemp(E->getArg(0)->getType()); 4000 Value *Val = EmitScalarExpr(E->getArg(0)); 4001 Builder.CreateStore(Val, Tmp); 4002 4003 Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 4004 Val = Builder.CreateLoad(LdPtr); 4005 4006 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 4007 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 4008 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); 4009 return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd"); 4010 } 4011 4012 if (BuiltinID == ARM::BI__builtin_arm_strex) { 4013 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 4014 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 4015 4016 QualType Ty = E->getArg(0)->getType(); 4017 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 4018 getContext().getTypeSize(Ty)); 4019 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 4020 4021 if (StoreVal->getType()->isPointerTy()) 4022 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); 4023 else { 4024 StoreVal = Builder.CreateBitCast(StoreVal, StoreTy); 4025 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); 4026 } 4027 4028 Function *F = CGM.getIntrinsic(Intrinsic::arm_strex, StoreAddr->getType()); 4029 return Builder.CreateCall2(F, StoreVal, StoreAddr, "strex"); 4030 } 4031 4032 if (BuiltinID == ARM::BI__builtin_arm_clrex) { 4033 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); 4034 return Builder.CreateCall(F); 4035 } 4036 4037 if (BuiltinID == ARM::BI__builtin_arm_sevl) { 4038 Function *F = CGM.getIntrinsic(Intrinsic::arm_sevl); 4039 return Builder.CreateCall(F); 4040 } 4041 4042 // CRC32 4043 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 4044 switch (BuiltinID) { 4045 case ARM::BI__builtin_arm_crc32b: 4046 CRCIntrinsicID = Intrinsic::arm_crc32b; break; 4047 case ARM::BI__builtin_arm_crc32cb: 4048 CRCIntrinsicID = Intrinsic::arm_crc32cb; break; 4049 case ARM::BI__builtin_arm_crc32h: 4050 CRCIntrinsicID = Intrinsic::arm_crc32h; break; 4051 case ARM::BI__builtin_arm_crc32ch: 4052 CRCIntrinsicID = Intrinsic::arm_crc32ch; break; 4053 case ARM::BI__builtin_arm_crc32w: 4054 case ARM::BI__builtin_arm_crc32d: 4055 CRCIntrinsicID = Intrinsic::arm_crc32w; break; 4056 case ARM::BI__builtin_arm_crc32cw: 4057 case ARM::BI__builtin_arm_crc32cd: 4058 CRCIntrinsicID = Intrinsic::arm_crc32cw; break; 4059 } 4060 4061 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 4062 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 4063 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 4064 4065 // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w 4066 // intrinsics, hence we need different codegen for these cases. 4067 if (BuiltinID == ARM::BI__builtin_arm_crc32d || 4068 BuiltinID == ARM::BI__builtin_arm_crc32cd) { 4069 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 4070 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); 4071 Value *Arg1b = Builder.CreateLShr(Arg1, C1); 4072 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); 4073 4074 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4075 Value *Res = Builder.CreateCall2(F, Arg0, Arg1a); 4076 return Builder.CreateCall2(F, Res, Arg1b); 4077 } else { 4078 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); 4079 4080 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4081 return Builder.CreateCall2(F, Arg0, Arg1); 4082 } 4083 } 4084 4085 SmallVector<Value*, 4> Ops; 4086 llvm::Value *Align = 0; 4087 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 4088 if (i == 0) { 4089 switch (BuiltinID) { 4090 case ARM::BI__builtin_neon_vld1_v: 4091 case ARM::BI__builtin_neon_vld1q_v: 4092 case ARM::BI__builtin_neon_vld1q_lane_v: 4093 case ARM::BI__builtin_neon_vld1_lane_v: 4094 case ARM::BI__builtin_neon_vld1_dup_v: 4095 case ARM::BI__builtin_neon_vld1q_dup_v: 4096 case ARM::BI__builtin_neon_vst1_v: 4097 case ARM::BI__builtin_neon_vst1q_v: 4098 case ARM::BI__builtin_neon_vst1q_lane_v: 4099 case ARM::BI__builtin_neon_vst1_lane_v: 4100 case ARM::BI__builtin_neon_vst2_v: 4101 case ARM::BI__builtin_neon_vst2q_v: 4102 case ARM::BI__builtin_neon_vst2_lane_v: 4103 case ARM::BI__builtin_neon_vst2q_lane_v: 4104 case ARM::BI__builtin_neon_vst3_v: 4105 case ARM::BI__builtin_neon_vst3q_v: 4106 case ARM::BI__builtin_neon_vst3_lane_v: 4107 case ARM::BI__builtin_neon_vst3q_lane_v: 4108 case ARM::BI__builtin_neon_vst4_v: 4109 case ARM::BI__builtin_neon_vst4q_v: 4110 case ARM::BI__builtin_neon_vst4_lane_v: 4111 case ARM::BI__builtin_neon_vst4q_lane_v: 4112 // Get the alignment for the argument in addition to the value; 4113 // we'll use it later. 4114 std::pair<llvm::Value*, unsigned> Src = 4115 EmitPointerWithAlignment(E->getArg(0)); 4116 Ops.push_back(Src.first); 4117 Align = Builder.getInt32(Src.second); 4118 continue; 4119 } 4120 } 4121 if (i == 1) { 4122 switch (BuiltinID) { 4123 case ARM::BI__builtin_neon_vld2_v: 4124 case ARM::BI__builtin_neon_vld2q_v: 4125 case ARM::BI__builtin_neon_vld3_v: 4126 case ARM::BI__builtin_neon_vld3q_v: 4127 case ARM::BI__builtin_neon_vld4_v: 4128 case ARM::BI__builtin_neon_vld4q_v: 4129 case ARM::BI__builtin_neon_vld2_lane_v: 4130 case ARM::BI__builtin_neon_vld2q_lane_v: 4131 case ARM::BI__builtin_neon_vld3_lane_v: 4132 case ARM::BI__builtin_neon_vld3q_lane_v: 4133 case ARM::BI__builtin_neon_vld4_lane_v: 4134 case ARM::BI__builtin_neon_vld4q_lane_v: 4135 case ARM::BI__builtin_neon_vld2_dup_v: 4136 case ARM::BI__builtin_neon_vld3_dup_v: 4137 case ARM::BI__builtin_neon_vld4_dup_v: 4138 // Get the alignment for the argument in addition to the value; 4139 // we'll use it later. 4140 std::pair<llvm::Value*, unsigned> Src = 4141 EmitPointerWithAlignment(E->getArg(1)); 4142 Ops.push_back(Src.first); 4143 Align = Builder.getInt32(Src.second); 4144 continue; 4145 } 4146 } 4147 Ops.push_back(EmitScalarExpr(E->getArg(i))); 4148 } 4149 4150 // vget_lane and vset_lane are not overloaded and do not have an extra 4151 // argument that specifies the vector type. 4152 switch (BuiltinID) { 4153 default: break; 4154 case ARM::BI__builtin_neon_vget_lane_i8: 4155 case ARM::BI__builtin_neon_vget_lane_i16: 4156 case ARM::BI__builtin_neon_vget_lane_i32: 4157 case ARM::BI__builtin_neon_vget_lane_i64: 4158 case ARM::BI__builtin_neon_vget_lane_f32: 4159 case ARM::BI__builtin_neon_vgetq_lane_i8: 4160 case ARM::BI__builtin_neon_vgetq_lane_i16: 4161 case ARM::BI__builtin_neon_vgetq_lane_i32: 4162 case ARM::BI__builtin_neon_vgetq_lane_i64: 4163 case ARM::BI__builtin_neon_vgetq_lane_f32: 4164 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4165 "vget_lane"); 4166 case ARM::BI__builtin_neon_vset_lane_i8: 4167 case ARM::BI__builtin_neon_vset_lane_i16: 4168 case ARM::BI__builtin_neon_vset_lane_i32: 4169 case ARM::BI__builtin_neon_vset_lane_i64: 4170 case ARM::BI__builtin_neon_vset_lane_f32: 4171 case ARM::BI__builtin_neon_vsetq_lane_i8: 4172 case ARM::BI__builtin_neon_vsetq_lane_i16: 4173 case ARM::BI__builtin_neon_vsetq_lane_i32: 4174 case ARM::BI__builtin_neon_vsetq_lane_i64: 4175 case ARM::BI__builtin_neon_vsetq_lane_f32: 4176 Ops.push_back(EmitScalarExpr(E->getArg(2))); 4177 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4178 } 4179 4180 // Get the last argument, which specifies the vector type. 4181 llvm::APSInt Result; 4182 const Expr *Arg = E->getArg(E->getNumArgs()-1); 4183 if (!Arg->isIntegerConstantExpr(Result, getContext())) 4184 return 0; 4185 4186 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 4187 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 4188 // Determine the overloaded type of this builtin. 4189 llvm::Type *Ty; 4190 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 4191 Ty = FloatTy; 4192 else 4193 Ty = DoubleTy; 4194 4195 // Determine whether this is an unsigned conversion or not. 4196 bool usgn = Result.getZExtValue() == 1; 4197 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 4198 4199 // Call the appropriate intrinsic. 4200 Function *F = CGM.getIntrinsic(Int, Ty); 4201 return Builder.CreateCall(F, Ops, "vcvtr"); 4202 } 4203 4204 // Determine the type of this overloaded NEON intrinsic. 4205 NeonTypeFlags Type(Result.getZExtValue()); 4206 bool usgn = Type.isUnsigned(); 4207 bool quad = Type.isQuad(); 4208 bool rightShift = false; 4209 4210 llvm::VectorType *VTy = GetNeonType(this, Type); 4211 llvm::Type *Ty = VTy; 4212 if (!Ty) 4213 return 0; 4214 4215 unsigned Int; 4216 switch (BuiltinID) { 4217 default: return 0; 4218 case ARM::BI__builtin_neon_vbsl_v: 4219 case ARM::BI__builtin_neon_vbslq_v: 4220 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty), 4221 Ops, "vbsl"); 4222 case ARM::BI__builtin_neon_vabd_v: 4223 case ARM::BI__builtin_neon_vabdq_v: 4224 Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds; 4225 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); 4226 case ARM::BI__builtin_neon_vabs_v: 4227 case ARM::BI__builtin_neon_vabsq_v: 4228 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, Ty), 4229 Ops, "vabs"); 4230 case ARM::BI__builtin_neon_vaddhn_v: { 4231 llvm::VectorType *SrcTy = 4232 llvm::VectorType::getExtendedElementVectorType(VTy); 4233 4234 // %sum = add <4 x i32> %lhs, %rhs 4235 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4236 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 4237 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); 4238 4239 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 4240 Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(), 4241 SrcTy->getScalarSizeInBits() / 2); 4242 ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt); 4243 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); 4244 4245 // %res = trunc <4 x i32> %high to <4 x i16> 4246 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); 4247 } 4248 case ARM::BI__builtin_neon_vcale_v: 4249 std::swap(Ops[0], Ops[1]); 4250 case ARM::BI__builtin_neon_vcage_v: { 4251 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged); 4252 return EmitNeonCall(F, Ops, "vcage"); 4253 } 4254 case ARM::BI__builtin_neon_vcaleq_v: 4255 std::swap(Ops[0], Ops[1]); 4256 case ARM::BI__builtin_neon_vcageq_v: { 4257 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq); 4258 return EmitNeonCall(F, Ops, "vcage"); 4259 } 4260 case ARM::BI__builtin_neon_vcalt_v: 4261 std::swap(Ops[0], Ops[1]); 4262 case ARM::BI__builtin_neon_vcagt_v: { 4263 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd); 4264 return EmitNeonCall(F, Ops, "vcagt"); 4265 } 4266 case ARM::BI__builtin_neon_vcaltq_v: 4267 std::swap(Ops[0], Ops[1]); 4268 case ARM::BI__builtin_neon_vcagtq_v: { 4269 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq); 4270 return EmitNeonCall(F, Ops, "vcagt"); 4271 } 4272 case ARM::BI__builtin_neon_vcls_v: 4273 case ARM::BI__builtin_neon_vclsq_v: { 4274 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, Ty); 4275 return EmitNeonCall(F, Ops, "vcls"); 4276 } 4277 case ARM::BI__builtin_neon_vclz_v: 4278 case ARM::BI__builtin_neon_vclzq_v: { 4279 // Generate target-independent intrinsic; also need to add second argument 4280 // for whether or not clz of zero is undefined; on ARM it isn't. 4281 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ty); 4282 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); 4283 return EmitNeonCall(F, Ops, "vclz"); 4284 } 4285 case ARM::BI__builtin_neon_vcnt_v: 4286 case ARM::BI__builtin_neon_vcntq_v: { 4287 // generate target-independent intrinsic 4288 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, Ty); 4289 return EmitNeonCall(F, Ops, "vctpop"); 4290 } 4291 case ARM::BI__builtin_neon_vcvt_f16_v: { 4292 assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad && 4293 "unexpected vcvt_f16_v builtin"); 4294 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf); 4295 return EmitNeonCall(F, Ops, "vcvt"); 4296 } 4297 case ARM::BI__builtin_neon_vcvt_f32_f16: { 4298 assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad && 4299 "unexpected vcvt_f32_f16 builtin"); 4300 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp); 4301 return EmitNeonCall(F, Ops, "vcvt"); 4302 } 4303 case ARM::BI__builtin_neon_vcvt_f32_v: 4304 case ARM::BI__builtin_neon_vcvtq_f32_v: 4305 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4306 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad)); 4307 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 4308 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 4309 case ARM::BI__builtin_neon_vcvt_s32_v: 4310 case ARM::BI__builtin_neon_vcvt_u32_v: 4311 case ARM::BI__builtin_neon_vcvtq_s32_v: 4312 case ARM::BI__builtin_neon_vcvtq_u32_v: { 4313 llvm::Type *FloatTy = 4314 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad)); 4315 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 4316 return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 4317 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 4318 } 4319 case ARM::BI__builtin_neon_vcvt_n_f32_v: 4320 case ARM::BI__builtin_neon_vcvtq_n_f32_v: { 4321 llvm::Type *FloatTy = 4322 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad)); 4323 llvm::Type *Tys[2] = { FloatTy, Ty }; 4324 Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp 4325 : Intrinsic::arm_neon_vcvtfxs2fp; 4326 Function *F = CGM.getIntrinsic(Int, Tys); 4327 return EmitNeonCall(F, Ops, "vcvt_n"); 4328 } 4329 case ARM::BI__builtin_neon_vcvt_n_s32_v: 4330 case ARM::BI__builtin_neon_vcvt_n_u32_v: 4331 case ARM::BI__builtin_neon_vcvtq_n_s32_v: 4332 case ARM::BI__builtin_neon_vcvtq_n_u32_v: { 4333 llvm::Type *FloatTy = 4334 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad)); 4335 llvm::Type *Tys[2] = { Ty, FloatTy }; 4336 Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu 4337 : Intrinsic::arm_neon_vcvtfp2fxs; 4338 Function *F = CGM.getIntrinsic(Int, Tys); 4339 return EmitNeonCall(F, Ops, "vcvt_n"); 4340 } 4341 case ARM::BI__builtin_neon_vext_v: 4342 case ARM::BI__builtin_neon_vextq_v: { 4343 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 4344 SmallVector<Constant*, 16> Indices; 4345 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 4346 Indices.push_back(ConstantInt::get(Int32Ty, i+CV)); 4347 4348 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4349 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4350 Value *SV = llvm::ConstantVector::get(Indices); 4351 return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext"); 4352 } 4353 case ARM::BI__builtin_neon_vhadd_v: 4354 case ARM::BI__builtin_neon_vhaddq_v: 4355 Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds; 4356 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhadd"); 4357 case ARM::BI__builtin_neon_vhsub_v: 4358 case ARM::BI__builtin_neon_vhsubq_v: 4359 Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs; 4360 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhsub"); 4361 case ARM::BI__builtin_neon_vld1_v: 4362 case ARM::BI__builtin_neon_vld1q_v: 4363 Ops.push_back(Align); 4364 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty), 4365 Ops, "vld1"); 4366 case ARM::BI__builtin_neon_vld1q_lane_v: 4367 // Handle 64-bit integer elements as a special case. Use shuffles of 4368 // one-element vectors to avoid poor code for i64 in the backend. 4369 if (VTy->getElementType()->isIntegerTy(64)) { 4370 // Extract the other lane. 4371 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4372 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); 4373 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); 4374 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 4375 // Load the value as a one-element vector. 4376 Ty = llvm::VectorType::get(VTy->getElementType(), 1); 4377 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty); 4378 Value *Ld = Builder.CreateCall2(F, Ops[0], Align); 4379 // Combine them. 4380 SmallVector<Constant*, 2> Indices; 4381 Indices.push_back(ConstantInt::get(Int32Ty, 1-Lane)); 4382 Indices.push_back(ConstantInt::get(Int32Ty, Lane)); 4383 SV = llvm::ConstantVector::get(Indices); 4384 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); 4385 } 4386 // fall through 4387 case ARM::BI__builtin_neon_vld1_lane_v: { 4388 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4389 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 4390 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4391 LoadInst *Ld = Builder.CreateLoad(Ops[0]); 4392 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 4393 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 4394 } 4395 case ARM::BI__builtin_neon_vld1_dup_v: 4396 case ARM::BI__builtin_neon_vld1q_dup_v: { 4397 Value *V = UndefValue::get(Ty); 4398 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 4399 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4400 LoadInst *Ld = Builder.CreateLoad(Ops[0]); 4401 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 4402 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 4403 Ops[0] = Builder.CreateInsertElement(V, Ld, CI); 4404 return EmitNeonSplat(Ops[0], CI); 4405 } 4406 case ARM::BI__builtin_neon_vld2_v: 4407 case ARM::BI__builtin_neon_vld2q_v: { 4408 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, Ty); 4409 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2"); 4410 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4411 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4412 return Builder.CreateStore(Ops[1], Ops[0]); 4413 } 4414 case ARM::BI__builtin_neon_vld3_v: 4415 case ARM::BI__builtin_neon_vld3q_v: { 4416 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, Ty); 4417 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3"); 4418 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4419 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4420 return Builder.CreateStore(Ops[1], Ops[0]); 4421 } 4422 case ARM::BI__builtin_neon_vld4_v: 4423 case ARM::BI__builtin_neon_vld4q_v: { 4424 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, Ty); 4425 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4"); 4426 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4427 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4428 return Builder.CreateStore(Ops[1], Ops[0]); 4429 } 4430 case ARM::BI__builtin_neon_vld2_lane_v: 4431 case ARM::BI__builtin_neon_vld2q_lane_v: { 4432 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, Ty); 4433 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4434 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 4435 Ops.push_back(Align); 4436 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); 4437 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4438 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4439 return Builder.CreateStore(Ops[1], Ops[0]); 4440 } 4441 case ARM::BI__builtin_neon_vld3_lane_v: 4442 case ARM::BI__builtin_neon_vld3q_lane_v: { 4443 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, Ty); 4444 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4445 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 4446 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 4447 Ops.push_back(Align); 4448 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); 4449 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4450 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4451 return Builder.CreateStore(Ops[1], Ops[0]); 4452 } 4453 case ARM::BI__builtin_neon_vld4_lane_v: 4454 case ARM::BI__builtin_neon_vld4q_lane_v: { 4455 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, Ty); 4456 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4457 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 4458 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 4459 Ops[5] = Builder.CreateBitCast(Ops[5], Ty); 4460 Ops.push_back(Align); 4461 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); 4462 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4463 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4464 return Builder.CreateStore(Ops[1], Ops[0]); 4465 } 4466 case ARM::BI__builtin_neon_vld2_dup_v: 4467 case ARM::BI__builtin_neon_vld3_dup_v: 4468 case ARM::BI__builtin_neon_vld4_dup_v: { 4469 // Handle 64-bit elements as a special-case. There is no "dup" needed. 4470 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { 4471 switch (BuiltinID) { 4472 case ARM::BI__builtin_neon_vld2_dup_v: 4473 Int = Intrinsic::arm_neon_vld2; 4474 break; 4475 case ARM::BI__builtin_neon_vld3_dup_v: 4476 Int = Intrinsic::arm_neon_vld3; 4477 break; 4478 case ARM::BI__builtin_neon_vld4_dup_v: 4479 Int = Intrinsic::arm_neon_vld4; 4480 break; 4481 default: llvm_unreachable("unknown vld_dup intrinsic?"); 4482 } 4483 Function *F = CGM.getIntrinsic(Int, Ty); 4484 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup"); 4485 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4486 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4487 return Builder.CreateStore(Ops[1], Ops[0]); 4488 } 4489 switch (BuiltinID) { 4490 case ARM::BI__builtin_neon_vld2_dup_v: 4491 Int = Intrinsic::arm_neon_vld2lane; 4492 break; 4493 case ARM::BI__builtin_neon_vld3_dup_v: 4494 Int = Intrinsic::arm_neon_vld3lane; 4495 break; 4496 case ARM::BI__builtin_neon_vld4_dup_v: 4497 Int = Intrinsic::arm_neon_vld4lane; 4498 break; 4499 default: llvm_unreachable("unknown vld_dup intrinsic?"); 4500 } 4501 Function *F = CGM.getIntrinsic(Int, Ty); 4502 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 4503 4504 SmallVector<Value*, 6> Args; 4505 Args.push_back(Ops[1]); 4506 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 4507 4508 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 4509 Args.push_back(CI); 4510 Args.push_back(Align); 4511 4512 Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); 4513 // splat lane 0 to all elts in each vector of the result. 4514 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 4515 Value *Val = Builder.CreateExtractValue(Ops[1], i); 4516 Value *Elt = Builder.CreateBitCast(Val, Ty); 4517 Elt = EmitNeonSplat(Elt, CI); 4518 Elt = Builder.CreateBitCast(Elt, Val->getType()); 4519 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 4520 } 4521 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4522 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4523 return Builder.CreateStore(Ops[1], Ops[0]); 4524 } 4525 case ARM::BI__builtin_neon_vmax_v: 4526 case ARM::BI__builtin_neon_vmaxq_v: 4527 Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs; 4528 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); 4529 case ARM::BI__builtin_neon_vmin_v: 4530 case ARM::BI__builtin_neon_vminq_v: 4531 Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins; 4532 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); 4533 case ARM::BI__builtin_neon_vmovl_v: { 4534 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 4535 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 4536 if (usgn) 4537 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 4538 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 4539 } 4540 case ARM::BI__builtin_neon_vmovn_v: { 4541 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 4542 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 4543 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 4544 } 4545 case ARM::BI__builtin_neon_vmul_v: 4546 case ARM::BI__builtin_neon_vmulq_v: 4547 assert(Type.isPoly() && "vmul builtin only supported for polynomial types"); 4548 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmulp, Ty), 4549 Ops, "vmul"); 4550 case ARM::BI__builtin_neon_vmull_v: 4551 // FIXME: the integer vmull operations could be emitted in terms of pure 4552 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of 4553 // hoisting the exts outside loops. Until global ISel comes along that can 4554 // see through such movement this leads to bad CodeGen. So we need an 4555 // intrinsic for now. 4556 Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 4557 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 4558 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 4559 case ARM::BI__builtin_neon_vfma_v: 4560 case ARM::BI__builtin_neon_vfmaq_v: { 4561 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 4562 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4563 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4564 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4565 4566 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 4567 return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 4568 } 4569 case ARM::BI__builtin_neon_vpadal_v: 4570 case ARM::BI__builtin_neon_vpadalq_v: { 4571 Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals; 4572 // The source operand type has twice as many elements of half the size. 4573 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 4574 llvm::Type *EltTy = 4575 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 4576 llvm::Type *NarrowTy = 4577 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 4578 llvm::Type *Tys[2] = { Ty, NarrowTy }; 4579 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpadal"); 4580 } 4581 case ARM::BI__builtin_neon_vpadd_v: 4582 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, Ty), 4583 Ops, "vpadd"); 4584 case ARM::BI__builtin_neon_vpaddl_v: 4585 case ARM::BI__builtin_neon_vpaddlq_v: { 4586 Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls; 4587 // The source operand type has twice as many elements of half the size. 4588 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 4589 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 4590 llvm::Type *NarrowTy = 4591 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 4592 llvm::Type *Tys[2] = { Ty, NarrowTy }; 4593 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); 4594 } 4595 case ARM::BI__builtin_neon_vpmax_v: 4596 Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs; 4597 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 4598 case ARM::BI__builtin_neon_vpmin_v: 4599 Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins; 4600 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 4601 case ARM::BI__builtin_neon_vqabs_v: 4602 case ARM::BI__builtin_neon_vqabsq_v: 4603 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, Ty), 4604 Ops, "vqabs"); 4605 case ARM::BI__builtin_neon_vqadd_v: 4606 case ARM::BI__builtin_neon_vqaddq_v: 4607 Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds; 4608 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqadd"); 4609 case ARM::BI__builtin_neon_vqdmlal_v: { 4610 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 4611 Value *Mul = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty), 4612 MulOps, "vqdmlal"); 4613 4614 SmallVector<Value *, 2> AddOps; 4615 AddOps.push_back(Ops[0]); 4616 AddOps.push_back(Mul); 4617 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqadds, Ty), 4618 AddOps, "vqdmlal"); 4619 } 4620 case ARM::BI__builtin_neon_vqdmlsl_v: { 4621 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 4622 Value *Mul = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty), 4623 MulOps, "vqdmlsl"); 4624 4625 SmallVector<Value *, 2> SubOps; 4626 SubOps.push_back(Ops[0]); 4627 SubOps.push_back(Mul); 4628 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqsubs, Ty), 4629 SubOps, "vqdmlsl"); 4630 } 4631 case ARM::BI__builtin_neon_vqdmulh_v: 4632 case ARM::BI__builtin_neon_vqdmulhq_v: 4633 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, Ty), 4634 Ops, "vqdmulh"); 4635 case ARM::BI__builtin_neon_vqdmull_v: 4636 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty), 4637 Ops, "vqdmull"); 4638 case ARM::BI__builtin_neon_vqmovn_v: 4639 Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns; 4640 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqmovn"); 4641 case ARM::BI__builtin_neon_vqmovun_v: 4642 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, Ty), 4643 Ops, "vqdmull"); 4644 case ARM::BI__builtin_neon_vqneg_v: 4645 case ARM::BI__builtin_neon_vqnegq_v: 4646 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, Ty), 4647 Ops, "vqneg"); 4648 case ARM::BI__builtin_neon_vqrdmulh_v: 4649 case ARM::BI__builtin_neon_vqrdmulhq_v: 4650 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, Ty), 4651 Ops, "vqrdmulh"); 4652 case ARM::BI__builtin_neon_vqrshl_v: 4653 case ARM::BI__builtin_neon_vqrshlq_v: 4654 Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts; 4655 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshl"); 4656 case ARM::BI__builtin_neon_vqrshrn_n_v: 4657 Int = 4658 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 4659 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 4660 1, true); 4661 case ARM::BI__builtin_neon_vqrshrun_n_v: 4662 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), 4663 Ops, "vqrshrun_n", 1, true); 4664 case ARM::BI__builtin_neon_vqshl_v: 4665 case ARM::BI__builtin_neon_vqshlq_v: 4666 Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts; 4667 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl"); 4668 case ARM::BI__builtin_neon_vqshl_n_v: 4669 case ARM::BI__builtin_neon_vqshlq_n_v: 4670 Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts; 4671 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 4672 1, false); 4673 case ARM::BI__builtin_neon_vqshlu_n_v: 4674 case ARM::BI__builtin_neon_vqshluq_n_v: 4675 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, Ty), 4676 Ops, "vqshlu", 1, false); 4677 case ARM::BI__builtin_neon_vqshrn_n_v: 4678 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 4679 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 4680 1, true); 4681 case ARM::BI__builtin_neon_vqshrun_n_v: 4682 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), 4683 Ops, "vqshrun_n", 1, true); 4684 case ARM::BI__builtin_neon_vqsub_v: 4685 case ARM::BI__builtin_neon_vqsubq_v: 4686 Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs; 4687 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqsub"); 4688 case ARM::BI__builtin_neon_vraddhn_v: 4689 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, Ty), 4690 Ops, "vraddhn"); 4691 case ARM::BI__builtin_neon_vrecpe_v: 4692 case ARM::BI__builtin_neon_vrecpeq_v: 4693 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), 4694 Ops, "vrecpe"); 4695 case ARM::BI__builtin_neon_vrecps_v: 4696 case ARM::BI__builtin_neon_vrecpsq_v: 4697 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, Ty), 4698 Ops, "vrecps"); 4699 case ARM::BI__builtin_neon_vrhadd_v: 4700 case ARM::BI__builtin_neon_vrhaddq_v: 4701 Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds; 4702 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrhadd"); 4703 case ARM::BI__builtin_neon_vrshl_v: 4704 case ARM::BI__builtin_neon_vrshlq_v: 4705 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 4706 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshl"); 4707 case ARM::BI__builtin_neon_vrshrn_n_v: 4708 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), 4709 Ops, "vrshrn_n", 1, true); 4710 case ARM::BI__builtin_neon_vrshr_n_v: 4711 case ARM::BI__builtin_neon_vrshrq_n_v: 4712 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 4713 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true); 4714 case ARM::BI__builtin_neon_vrsqrte_v: 4715 case ARM::BI__builtin_neon_vrsqrteq_v: 4716 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, Ty), 4717 Ops, "vrsqrte"); 4718 case ARM::BI__builtin_neon_vrsqrts_v: 4719 case ARM::BI__builtin_neon_vrsqrtsq_v: 4720 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, Ty), 4721 Ops, "vrsqrts"); 4722 case ARM::BI__builtin_neon_vrsra_n_v: 4723 case ARM::BI__builtin_neon_vrsraq_n_v: 4724 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4725 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4726 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 4727 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 4728 Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]); 4729 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 4730 case ARM::BI__builtin_neon_vrsubhn_v: 4731 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, Ty), 4732 Ops, "vrsubhn"); 4733 case ARM::BI__builtin_neon_vshl_v: 4734 case ARM::BI__builtin_neon_vshlq_v: 4735 Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts; 4736 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshl"); 4737 case ARM::BI__builtin_neon_vshll_n_v: 4738 Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls; 4739 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshll", 1); 4740 case ARM::BI__builtin_neon_vshl_n_v: 4741 case ARM::BI__builtin_neon_vshlq_n_v: 4742 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 4743 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], 4744 "vshl_n"); 4745 case ARM::BI__builtin_neon_vshrn_n_v: 4746 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, Ty), 4747 Ops, "vshrn_n", 1, true); 4748 case ARM::BI__builtin_neon_vshr_n_v: 4749 case ARM::BI__builtin_neon_vshrq_n_v: 4750 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, usgn, "vshr_n"); 4751 case ARM::BI__builtin_neon_vsri_n_v: 4752 case ARM::BI__builtin_neon_vsriq_n_v: 4753 rightShift = true; 4754 case ARM::BI__builtin_neon_vsli_n_v: 4755 case ARM::BI__builtin_neon_vsliq_n_v: 4756 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 4757 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), 4758 Ops, "vsli_n"); 4759 case ARM::BI__builtin_neon_vsra_n_v: 4760 case ARM::BI__builtin_neon_vsraq_n_v: 4761 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4762 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 4763 return Builder.CreateAdd(Ops[0], Ops[1]); 4764 case ARM::BI__builtin_neon_vst1_v: 4765 case ARM::BI__builtin_neon_vst1q_v: 4766 Ops.push_back(Align); 4767 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, Ty), 4768 Ops, ""); 4769 case ARM::BI__builtin_neon_vst1q_lane_v: 4770 // Handle 64-bit integer elements as a special case. Use a shuffle to get 4771 // a one-element vector and avoid poor code for i64 in the backend. 4772 if (VTy->getElementType()->isIntegerTy(64)) { 4773 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4774 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); 4775 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 4776 Ops[2] = Align; 4777 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, 4778 Ops[1]->getType()), Ops); 4779 } 4780 // fall through 4781 case ARM::BI__builtin_neon_vst1_lane_v: { 4782 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4783 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 4784 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4785 StoreInst *St = Builder.CreateStore(Ops[1], 4786 Builder.CreateBitCast(Ops[0], Ty)); 4787 St->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 4788 return St; 4789 } 4790 case ARM::BI__builtin_neon_vst2_v: 4791 case ARM::BI__builtin_neon_vst2q_v: 4792 Ops.push_back(Align); 4793 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, Ty), 4794 Ops, ""); 4795 case ARM::BI__builtin_neon_vst2_lane_v: 4796 case ARM::BI__builtin_neon_vst2q_lane_v: 4797 Ops.push_back(Align); 4798 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, Ty), 4799 Ops, ""); 4800 case ARM::BI__builtin_neon_vst3_v: 4801 case ARM::BI__builtin_neon_vst3q_v: 4802 Ops.push_back(Align); 4803 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, Ty), 4804 Ops, ""); 4805 case ARM::BI__builtin_neon_vst3_lane_v: 4806 case ARM::BI__builtin_neon_vst3q_lane_v: 4807 Ops.push_back(Align); 4808 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, Ty), 4809 Ops, ""); 4810 case ARM::BI__builtin_neon_vst4_v: 4811 case ARM::BI__builtin_neon_vst4q_v: 4812 Ops.push_back(Align); 4813 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, Ty), 4814 Ops, ""); 4815 case ARM::BI__builtin_neon_vst4_lane_v: 4816 case ARM::BI__builtin_neon_vst4q_lane_v: 4817 Ops.push_back(Align); 4818 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, Ty), 4819 Ops, ""); 4820 case ARM::BI__builtin_neon_vsubhn_v: { 4821 llvm::VectorType *SrcTy = 4822 llvm::VectorType::getExtendedElementVectorType(VTy); 4823 4824 // %sum = add <4 x i32> %lhs, %rhs 4825 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4826 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 4827 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); 4828 4829 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 4830 Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(), 4831 SrcTy->getScalarSizeInBits() / 2); 4832 ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt); 4833 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); 4834 4835 // %res = trunc <4 x i32> %high to <4 x i16> 4836 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); 4837 } 4838 case ARM::BI__builtin_neon_vtbl1_v: 4839 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 4840 Ops, "vtbl1"); 4841 case ARM::BI__builtin_neon_vtbl2_v: 4842 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 4843 Ops, "vtbl2"); 4844 case ARM::BI__builtin_neon_vtbl3_v: 4845 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 4846 Ops, "vtbl3"); 4847 case ARM::BI__builtin_neon_vtbl4_v: 4848 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 4849 Ops, "vtbl4"); 4850 case ARM::BI__builtin_neon_vtbx1_v: 4851 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 4852 Ops, "vtbx1"); 4853 case ARM::BI__builtin_neon_vtbx2_v: 4854 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 4855 Ops, "vtbx2"); 4856 case ARM::BI__builtin_neon_vtbx3_v: 4857 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 4858 Ops, "vtbx3"); 4859 case ARM::BI__builtin_neon_vtbx4_v: 4860 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 4861 Ops, "vtbx4"); 4862 case ARM::BI__builtin_neon_vtst_v: 4863 case ARM::BI__builtin_neon_vtstq_v: { 4864 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4865 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4866 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 4867 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 4868 ConstantAggregateZero::get(Ty)); 4869 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 4870 } 4871 case ARM::BI__builtin_neon_vtrn_v: 4872 case ARM::BI__builtin_neon_vtrnq_v: { 4873 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4874 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4875 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4876 Value *SV = 0; 4877 4878 for (unsigned vi = 0; vi != 2; ++vi) { 4879 SmallVector<Constant*, 16> Indices; 4880 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 4881 Indices.push_back(Builder.getInt32(i+vi)); 4882 Indices.push_back(Builder.getInt32(i+e+vi)); 4883 } 4884 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 4885 SV = llvm::ConstantVector::get(Indices); 4886 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); 4887 SV = Builder.CreateStore(SV, Addr); 4888 } 4889 return SV; 4890 } 4891 case ARM::BI__builtin_neon_vuzp_v: 4892 case ARM::BI__builtin_neon_vuzpq_v: { 4893 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4894 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4895 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4896 Value *SV = 0; 4897 4898 for (unsigned vi = 0; vi != 2; ++vi) { 4899 SmallVector<Constant*, 16> Indices; 4900 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 4901 Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); 4902 4903 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 4904 SV = llvm::ConstantVector::get(Indices); 4905 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); 4906 SV = Builder.CreateStore(SV, Addr); 4907 } 4908 return SV; 4909 } 4910 case ARM::BI__builtin_neon_vzip_v: 4911 case ARM::BI__builtin_neon_vzipq_v: { 4912 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4913 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4914 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4915 Value *SV = 0; 4916 4917 for (unsigned vi = 0; vi != 2; ++vi) { 4918 SmallVector<Constant*, 16> Indices; 4919 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 4920 Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); 4921 Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); 4922 } 4923 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 4924 SV = llvm::ConstantVector::get(Indices); 4925 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); 4926 SV = Builder.CreateStore(SV, Addr); 4927 } 4928 return SV; 4929 } 4930 } 4931 } 4932 4933 llvm::Value *CodeGenFunction:: 4934 BuildVector(ArrayRef<llvm::Value*> Ops) { 4935 assert((Ops.size() & (Ops.size() - 1)) == 0 && 4936 "Not a power-of-two sized vector!"); 4937 bool AllConstants = true; 4938 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 4939 AllConstants &= isa<Constant>(Ops[i]); 4940 4941 // If this is a constant vector, create a ConstantVector. 4942 if (AllConstants) { 4943 SmallVector<llvm::Constant*, 16> CstOps; 4944 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 4945 CstOps.push_back(cast<Constant>(Ops[i])); 4946 return llvm::ConstantVector::get(CstOps); 4947 } 4948 4949 // Otherwise, insertelement the values to build the vector. 4950 Value *Result = 4951 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 4952 4953 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 4954 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); 4955 4956 return Result; 4957 } 4958 4959 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 4960 const CallExpr *E) { 4961 SmallVector<Value*, 4> Ops; 4962 4963 // Find out if any arguments are required to be integer constant expressions. 4964 unsigned ICEArguments = 0; 4965 ASTContext::GetBuiltinTypeError Error; 4966 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 4967 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 4968 4969 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 4970 // If this is a normal argument, just emit it as a scalar. 4971 if ((ICEArguments & (1 << i)) == 0) { 4972 Ops.push_back(EmitScalarExpr(E->getArg(i))); 4973 continue; 4974 } 4975 4976 // If this is required to be a constant, constant fold it so that we know 4977 // that the generated intrinsic gets a ConstantInt. 4978 llvm::APSInt Result; 4979 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 4980 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 4981 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 4982 } 4983 4984 switch (BuiltinID) { 4985 default: return 0; 4986 case X86::BI__builtin_ia32_vec_init_v8qi: 4987 case X86::BI__builtin_ia32_vec_init_v4hi: 4988 case X86::BI__builtin_ia32_vec_init_v2si: 4989 return Builder.CreateBitCast(BuildVector(Ops), 4990 llvm::Type::getX86_MMXTy(getLLVMContext())); 4991 case X86::BI__builtin_ia32_vec_ext_v2si: 4992 return Builder.CreateExtractElement(Ops[0], 4993 llvm::ConstantInt::get(Ops[1]->getType(), 0)); 4994 case X86::BI__builtin_ia32_ldmxcsr: { 4995 Value *Tmp = CreateMemTemp(E->getArg(0)->getType()); 4996 Builder.CreateStore(Ops[0], Tmp); 4997 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 4998 Builder.CreateBitCast(Tmp, Int8PtrTy)); 4999 } 5000 case X86::BI__builtin_ia32_stmxcsr: { 5001 Value *Tmp = CreateMemTemp(E->getType()); 5002 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 5003 Builder.CreateBitCast(Tmp, Int8PtrTy)); 5004 return Builder.CreateLoad(Tmp, "stmxcsr"); 5005 } 5006 case X86::BI__builtin_ia32_storehps: 5007 case X86::BI__builtin_ia32_storelps: { 5008 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 5009 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 5010 5011 // cast val v2i64 5012 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 5013 5014 // extract (0, 1) 5015 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 5016 llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index); 5017 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 5018 5019 // cast pointer to i64 & store 5020 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 5021 return Builder.CreateStore(Ops[1], Ops[0]); 5022 } 5023 case X86::BI__builtin_ia32_palignr: { 5024 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 5025 5026 // If palignr is shifting the pair of input vectors less than 9 bytes, 5027 // emit a shuffle instruction. 5028 if (shiftVal <= 8) { 5029 SmallVector<llvm::Constant*, 8> Indices; 5030 for (unsigned i = 0; i != 8; ++i) 5031 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i)); 5032 5033 Value* SV = llvm::ConstantVector::get(Indices); 5034 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 5035 } 5036 5037 // If palignr is shifting the pair of input vectors more than 8 but less 5038 // than 16 bytes, emit a logical right shift of the destination. 5039 if (shiftVal < 16) { 5040 // MMX has these as 1 x i64 vectors for some odd optimization reasons. 5041 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1); 5042 5043 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 5044 Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8); 5045 5046 // create i32 constant 5047 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q); 5048 return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr"); 5049 } 5050 5051 // If palignr is shifting the pair of vectors more than 16 bytes, emit zero. 5052 return llvm::Constant::getNullValue(ConvertType(E->getType())); 5053 } 5054 case X86::BI__builtin_ia32_palignr128: { 5055 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 5056 5057 // If palignr is shifting the pair of input vectors less than 17 bytes, 5058 // emit a shuffle instruction. 5059 if (shiftVal <= 16) { 5060 SmallVector<llvm::Constant*, 16> Indices; 5061 for (unsigned i = 0; i != 16; ++i) 5062 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i)); 5063 5064 Value* SV = llvm::ConstantVector::get(Indices); 5065 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 5066 } 5067 5068 // If palignr is shifting the pair of input vectors more than 16 but less 5069 // than 32 bytes, emit a logical right shift of the destination. 5070 if (shiftVal < 32) { 5071 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 5072 5073 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 5074 Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8); 5075 5076 // create i32 constant 5077 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq); 5078 return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr"); 5079 } 5080 5081 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. 5082 return llvm::Constant::getNullValue(ConvertType(E->getType())); 5083 } 5084 case X86::BI__builtin_ia32_palignr256: { 5085 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 5086 5087 // If palignr is shifting the pair of input vectors less than 17 bytes, 5088 // emit a shuffle instruction. 5089 if (shiftVal <= 16) { 5090 SmallVector<llvm::Constant*, 32> Indices; 5091 // 256-bit palignr operates on 128-bit lanes so we need to handle that 5092 for (unsigned l = 0; l != 2; ++l) { 5093 unsigned LaneStart = l * 16; 5094 unsigned LaneEnd = (l+1) * 16; 5095 for (unsigned i = 0; i != 16; ++i) { 5096 unsigned Idx = shiftVal + i + LaneStart; 5097 if (Idx >= LaneEnd) Idx += 16; // end of lane, switch operand 5098 Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx)); 5099 } 5100 } 5101 5102 Value* SV = llvm::ConstantVector::get(Indices); 5103 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 5104 } 5105 5106 // If palignr is shifting the pair of input vectors more than 16 but less 5107 // than 32 bytes, emit a logical right shift of the destination. 5108 if (shiftVal < 32) { 5109 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 4); 5110 5111 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 5112 Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8); 5113 5114 // create i32 constant 5115 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_avx2_psrl_dq); 5116 return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr"); 5117 } 5118 5119 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. 5120 return llvm::Constant::getNullValue(ConvertType(E->getType())); 5121 } 5122 case X86::BI__builtin_ia32_movntps: 5123 case X86::BI__builtin_ia32_movntps256: 5124 case X86::BI__builtin_ia32_movntpd: 5125 case X86::BI__builtin_ia32_movntpd256: 5126 case X86::BI__builtin_ia32_movntdq: 5127 case X86::BI__builtin_ia32_movntdq256: 5128 case X86::BI__builtin_ia32_movnti: 5129 case X86::BI__builtin_ia32_movnti64: { 5130 llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(), 5131 Builder.getInt32(1)); 5132 5133 // Convert the type of the pointer to a pointer to the stored type. 5134 Value *BC = Builder.CreateBitCast(Ops[0], 5135 llvm::PointerType::getUnqual(Ops[1]->getType()), 5136 "cast"); 5137 StoreInst *SI = Builder.CreateStore(Ops[1], BC); 5138 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 5139 5140 // If the operand is an integer, we can't assume alignment. Otherwise, 5141 // assume natural alignment. 5142 QualType ArgTy = E->getArg(1)->getType(); 5143 unsigned Align; 5144 if (ArgTy->isIntegerType()) 5145 Align = 1; 5146 else 5147 Align = getContext().getTypeSizeInChars(ArgTy).getQuantity(); 5148 SI->setAlignment(Align); 5149 return SI; 5150 } 5151 // 3DNow! 5152 case X86::BI__builtin_ia32_pswapdsf: 5153 case X86::BI__builtin_ia32_pswapdsi: { 5154 const char *name = 0; 5155 Intrinsic::ID ID = Intrinsic::not_intrinsic; 5156 switch(BuiltinID) { 5157 default: llvm_unreachable("Unsupported intrinsic!"); 5158 case X86::BI__builtin_ia32_pswapdsf: 5159 case X86::BI__builtin_ia32_pswapdsi: 5160 name = "pswapd"; 5161 ID = Intrinsic::x86_3dnowa_pswapd; 5162 break; 5163 } 5164 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); 5165 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); 5166 llvm::Function *F = CGM.getIntrinsic(ID); 5167 return Builder.CreateCall(F, Ops, name); 5168 } 5169 case X86::BI__builtin_ia32_rdrand16_step: 5170 case X86::BI__builtin_ia32_rdrand32_step: 5171 case X86::BI__builtin_ia32_rdrand64_step: 5172 case X86::BI__builtin_ia32_rdseed16_step: 5173 case X86::BI__builtin_ia32_rdseed32_step: 5174 case X86::BI__builtin_ia32_rdseed64_step: { 5175 Intrinsic::ID ID; 5176 switch (BuiltinID) { 5177 default: llvm_unreachable("Unsupported intrinsic!"); 5178 case X86::BI__builtin_ia32_rdrand16_step: 5179 ID = Intrinsic::x86_rdrand_16; 5180 break; 5181 case X86::BI__builtin_ia32_rdrand32_step: 5182 ID = Intrinsic::x86_rdrand_32; 5183 break; 5184 case X86::BI__builtin_ia32_rdrand64_step: 5185 ID = Intrinsic::x86_rdrand_64; 5186 break; 5187 case X86::BI__builtin_ia32_rdseed16_step: 5188 ID = Intrinsic::x86_rdseed_16; 5189 break; 5190 case X86::BI__builtin_ia32_rdseed32_step: 5191 ID = Intrinsic::x86_rdseed_32; 5192 break; 5193 case X86::BI__builtin_ia32_rdseed64_step: 5194 ID = Intrinsic::x86_rdseed_64; 5195 break; 5196 } 5197 5198 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); 5199 Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]); 5200 return Builder.CreateExtractValue(Call, 1); 5201 } 5202 // AVX2 broadcast 5203 case X86::BI__builtin_ia32_vbroadcastsi256: { 5204 Value *VecTmp = CreateMemTemp(E->getArg(0)->getType()); 5205 Builder.CreateStore(Ops[0], VecTmp); 5206 Value *F = CGM.getIntrinsic(Intrinsic::x86_avx2_vbroadcasti128); 5207 return Builder.CreateCall(F, Builder.CreateBitCast(VecTmp, Int8PtrTy)); 5208 } 5209 } 5210 } 5211 5212 5213 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 5214 const CallExpr *E) { 5215 SmallVector<Value*, 4> Ops; 5216 5217 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 5218 Ops.push_back(EmitScalarExpr(E->getArg(i))); 5219 5220 Intrinsic::ID ID = Intrinsic::not_intrinsic; 5221 5222 switch (BuiltinID) { 5223 default: return 0; 5224 5225 // vec_ld, vec_lvsl, vec_lvsr 5226 case PPC::BI__builtin_altivec_lvx: 5227 case PPC::BI__builtin_altivec_lvxl: 5228 case PPC::BI__builtin_altivec_lvebx: 5229 case PPC::BI__builtin_altivec_lvehx: 5230 case PPC::BI__builtin_altivec_lvewx: 5231 case PPC::BI__builtin_altivec_lvsl: 5232 case PPC::BI__builtin_altivec_lvsr: 5233 { 5234 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 5235 5236 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); 5237 Ops.pop_back(); 5238 5239 switch (BuiltinID) { 5240 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); 5241 case PPC::BI__builtin_altivec_lvx: 5242 ID = Intrinsic::ppc_altivec_lvx; 5243 break; 5244 case PPC::BI__builtin_altivec_lvxl: 5245 ID = Intrinsic::ppc_altivec_lvxl; 5246 break; 5247 case PPC::BI__builtin_altivec_lvebx: 5248 ID = Intrinsic::ppc_altivec_lvebx; 5249 break; 5250 case PPC::BI__builtin_altivec_lvehx: 5251 ID = Intrinsic::ppc_altivec_lvehx; 5252 break; 5253 case PPC::BI__builtin_altivec_lvewx: 5254 ID = Intrinsic::ppc_altivec_lvewx; 5255 break; 5256 case PPC::BI__builtin_altivec_lvsl: 5257 ID = Intrinsic::ppc_altivec_lvsl; 5258 break; 5259 case PPC::BI__builtin_altivec_lvsr: 5260 ID = Intrinsic::ppc_altivec_lvsr; 5261 break; 5262 } 5263 llvm::Function *F = CGM.getIntrinsic(ID); 5264 return Builder.CreateCall(F, Ops, ""); 5265 } 5266 5267 // vec_st 5268 case PPC::BI__builtin_altivec_stvx: 5269 case PPC::BI__builtin_altivec_stvxl: 5270 case PPC::BI__builtin_altivec_stvebx: 5271 case PPC::BI__builtin_altivec_stvehx: 5272 case PPC::BI__builtin_altivec_stvewx: 5273 { 5274 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 5275 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); 5276 Ops.pop_back(); 5277 5278 switch (BuiltinID) { 5279 default: llvm_unreachable("Unsupported st intrinsic!"); 5280 case PPC::BI__builtin_altivec_stvx: 5281 ID = Intrinsic::ppc_altivec_stvx; 5282 break; 5283 case PPC::BI__builtin_altivec_stvxl: 5284 ID = Intrinsic::ppc_altivec_stvxl; 5285 break; 5286 case PPC::BI__builtin_altivec_stvebx: 5287 ID = Intrinsic::ppc_altivec_stvebx; 5288 break; 5289 case PPC::BI__builtin_altivec_stvehx: 5290 ID = Intrinsic::ppc_altivec_stvehx; 5291 break; 5292 case PPC::BI__builtin_altivec_stvewx: 5293 ID = Intrinsic::ppc_altivec_stvewx; 5294 break; 5295 } 5296 llvm::Function *F = CGM.getIntrinsic(ID); 5297 return Builder.CreateCall(F, Ops, ""); 5298 } 5299 } 5300 } 5301