1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit Builtin calls as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeGenFunction.h" 15 #include "CGObjCRuntime.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/ASTContext.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/Basic/TargetBuiltins.h" 21 #include "clang/Basic/TargetInfo.h" 22 #include "clang/CodeGen/CGFunctionInfo.h" 23 #include "llvm/IR/DataLayout.h" 24 #include "llvm/IR/Intrinsics.h" 25 26 using namespace clang; 27 using namespace CodeGen; 28 using namespace llvm; 29 30 /// getBuiltinLibFunction - Given a builtin id for a function like 31 /// "__builtin_fabsf", return a Function* for "fabsf". 32 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, 33 unsigned BuiltinID) { 34 assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); 35 36 // Get the name, skip over the __builtin_ prefix (if necessary). 37 StringRef Name; 38 GlobalDecl D(FD); 39 40 // If the builtin has been declared explicitly with an assembler label, 41 // use the mangled name. This differs from the plain label on platforms 42 // that prefix labels. 43 if (FD->hasAttr<AsmLabelAttr>()) 44 Name = getMangledName(D); 45 else 46 Name = Context.BuiltinInfo.GetName(BuiltinID) + 10; 47 48 llvm::FunctionType *Ty = 49 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); 50 51 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); 52 } 53 54 /// Emit the conversions required to turn the given value into an 55 /// integer of the given size. 56 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 57 QualType T, llvm::IntegerType *IntType) { 58 V = CGF.EmitToMemory(V, T); 59 60 if (V->getType()->isPointerTy()) 61 return CGF.Builder.CreatePtrToInt(V, IntType); 62 63 assert(V->getType() == IntType); 64 return V; 65 } 66 67 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 68 QualType T, llvm::Type *ResultType) { 69 V = CGF.EmitFromMemory(V, T); 70 71 if (ResultType->isPointerTy()) 72 return CGF.Builder.CreateIntToPtr(V, ResultType); 73 74 assert(V->getType() == ResultType); 75 return V; 76 } 77 78 /// Utility to insert an atomic instruction based on Instrinsic::ID 79 /// and the expression node. 80 static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 81 llvm::AtomicRMWInst::BinOp Kind, 82 const CallExpr *E) { 83 QualType T = E->getType(); 84 assert(E->getArg(0)->getType()->isPointerType()); 85 assert(CGF.getContext().hasSameUnqualifiedType(T, 86 E->getArg(0)->getType()->getPointeeType())); 87 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 88 89 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 90 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 91 92 llvm::IntegerType *IntType = 93 llvm::IntegerType::get(CGF.getLLVMContext(), 94 CGF.getContext().getTypeSize(T)); 95 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 96 97 llvm::Value *Args[2]; 98 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 99 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 100 llvm::Type *ValueType = Args[1]->getType(); 101 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 102 103 llvm::Value *Result = 104 CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1], 105 llvm::SequentiallyConsistent); 106 Result = EmitFromInt(CGF, Result, T, ValueType); 107 return RValue::get(Result); 108 } 109 110 /// Utility to insert an atomic instruction based Instrinsic::ID and 111 /// the expression node, where the return value is the result of the 112 /// operation. 113 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 114 llvm::AtomicRMWInst::BinOp Kind, 115 const CallExpr *E, 116 Instruction::BinaryOps Op) { 117 QualType T = E->getType(); 118 assert(E->getArg(0)->getType()->isPointerType()); 119 assert(CGF.getContext().hasSameUnqualifiedType(T, 120 E->getArg(0)->getType()->getPointeeType())); 121 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 122 123 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 124 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 125 126 llvm::IntegerType *IntType = 127 llvm::IntegerType::get(CGF.getLLVMContext(), 128 CGF.getContext().getTypeSize(T)); 129 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 130 131 llvm::Value *Args[2]; 132 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 133 llvm::Type *ValueType = Args[1]->getType(); 134 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 135 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 136 137 llvm::Value *Result = 138 CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1], 139 llvm::SequentiallyConsistent); 140 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 141 Result = EmitFromInt(CGF, Result, T, ValueType); 142 return RValue::get(Result); 143 } 144 145 /// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy, 146 /// which must be a scalar floating point type. 147 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) { 148 const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>(); 149 assert(ValTyP && "isn't scalar fp type!"); 150 151 StringRef FnName; 152 switch (ValTyP->getKind()) { 153 default: llvm_unreachable("Isn't a scalar fp type!"); 154 case BuiltinType::Float: FnName = "fabsf"; break; 155 case BuiltinType::Double: FnName = "fabs"; break; 156 case BuiltinType::LongDouble: FnName = "fabsl"; break; 157 } 158 159 // The prototype is something that takes and returns whatever V's type is. 160 llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), V->getType(), 161 false); 162 llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName); 163 164 return CGF.EmitNounwindRuntimeCall(Fn, V, "abs"); 165 } 166 167 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn, 168 const CallExpr *E, llvm::Value *calleeValue) { 169 return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E->getLocStart(), 170 ReturnValueSlot(), E->arg_begin(), E->arg_end(), Fn); 171 } 172 173 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* 174 /// depending on IntrinsicID. 175 /// 176 /// \arg CGF The current codegen function. 177 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. 178 /// \arg X The first argument to the llvm.*.with.overflow.*. 179 /// \arg Y The second argument to the llvm.*.with.overflow.*. 180 /// \arg Carry The carry returned by the llvm.*.with.overflow.*. 181 /// \returns The result (i.e. sum/product) returned by the intrinsic. 182 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, 183 const llvm::Intrinsic::ID IntrinsicID, 184 llvm::Value *X, llvm::Value *Y, 185 llvm::Value *&Carry) { 186 // Make sure we have integers of the same width. 187 assert(X->getType() == Y->getType() && 188 "Arguments must be the same type. (Did you forget to make sure both " 189 "arguments have the same integer width?)"); 190 191 llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); 192 llvm::Value *Tmp = CGF.Builder.CreateCall2(Callee, X, Y); 193 Carry = CGF.Builder.CreateExtractValue(Tmp, 1); 194 return CGF.Builder.CreateExtractValue(Tmp, 0); 195 } 196 197 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 198 unsigned BuiltinID, const CallExpr *E) { 199 // See if we can constant fold this builtin. If so, don't emit it at all. 200 Expr::EvalResult Result; 201 if (E->EvaluateAsRValue(Result, CGM.getContext()) && 202 !Result.hasSideEffects()) { 203 if (Result.Val.isInt()) 204 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 205 Result.Val.getInt())); 206 if (Result.Val.isFloat()) 207 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 208 Result.Val.getFloat())); 209 } 210 211 switch (BuiltinID) { 212 default: break; // Handle intrinsics and libm functions below. 213 case Builtin::BI__builtin___CFStringMakeConstantString: 214 case Builtin::BI__builtin___NSStringMakeConstantString: 215 return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr)); 216 case Builtin::BI__builtin_stdarg_start: 217 case Builtin::BI__builtin_va_start: 218 case Builtin::BI__va_start: 219 case Builtin::BI__builtin_va_end: { 220 Value *ArgValue = (BuiltinID == Builtin::BI__va_start) 221 ? EmitScalarExpr(E->getArg(0)) 222 : EmitVAListRef(E->getArg(0)); 223 llvm::Type *DestType = Int8PtrTy; 224 if (ArgValue->getType() != DestType) 225 ArgValue = Builder.CreateBitCast(ArgValue, DestType, 226 ArgValue->getName().data()); 227 228 Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ? 229 Intrinsic::vaend : Intrinsic::vastart; 230 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue)); 231 } 232 case Builtin::BI__builtin_va_copy: { 233 Value *DstPtr = EmitVAListRef(E->getArg(0)); 234 Value *SrcPtr = EmitVAListRef(E->getArg(1)); 235 236 llvm::Type *Type = Int8PtrTy; 237 238 DstPtr = Builder.CreateBitCast(DstPtr, Type); 239 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 240 return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy), 241 DstPtr, SrcPtr)); 242 } 243 case Builtin::BI__builtin_abs: 244 case Builtin::BI__builtin_labs: 245 case Builtin::BI__builtin_llabs: { 246 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 247 248 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 249 Value *CmpResult = 250 Builder.CreateICmpSGE(ArgValue, 251 llvm::Constant::getNullValue(ArgValue->getType()), 252 "abscond"); 253 Value *Result = 254 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 255 256 return RValue::get(Result); 257 } 258 259 case Builtin::BI__builtin_conj: 260 case Builtin::BI__builtin_conjf: 261 case Builtin::BI__builtin_conjl: { 262 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 263 Value *Real = ComplexVal.first; 264 Value *Imag = ComplexVal.second; 265 Value *Zero = 266 Imag->getType()->isFPOrFPVectorTy() 267 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) 268 : llvm::Constant::getNullValue(Imag->getType()); 269 270 Imag = Builder.CreateFSub(Zero, Imag, "sub"); 271 return RValue::getComplex(std::make_pair(Real, Imag)); 272 } 273 case Builtin::BI__builtin_creal: 274 case Builtin::BI__builtin_crealf: 275 case Builtin::BI__builtin_creall: 276 case Builtin::BIcreal: 277 case Builtin::BIcrealf: 278 case Builtin::BIcreall: { 279 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 280 return RValue::get(ComplexVal.first); 281 } 282 283 case Builtin::BI__builtin_cimag: 284 case Builtin::BI__builtin_cimagf: 285 case Builtin::BI__builtin_cimagl: 286 case Builtin::BIcimag: 287 case Builtin::BIcimagf: 288 case Builtin::BIcimagl: { 289 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 290 return RValue::get(ComplexVal.second); 291 } 292 293 case Builtin::BI__builtin_ctzs: 294 case Builtin::BI__builtin_ctz: 295 case Builtin::BI__builtin_ctzl: 296 case Builtin::BI__builtin_ctzll: { 297 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 298 299 llvm::Type *ArgType = ArgValue->getType(); 300 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 301 302 llvm::Type *ResultType = ConvertType(E->getType()); 303 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 304 Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef); 305 if (Result->getType() != ResultType) 306 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 307 "cast"); 308 return RValue::get(Result); 309 } 310 case Builtin::BI__builtin_clzs: 311 case Builtin::BI__builtin_clz: 312 case Builtin::BI__builtin_clzl: 313 case Builtin::BI__builtin_clzll: { 314 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 315 316 llvm::Type *ArgType = ArgValue->getType(); 317 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 318 319 llvm::Type *ResultType = ConvertType(E->getType()); 320 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 321 Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef); 322 if (Result->getType() != ResultType) 323 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 324 "cast"); 325 return RValue::get(Result); 326 } 327 case Builtin::BI__builtin_ffs: 328 case Builtin::BI__builtin_ffsl: 329 case Builtin::BI__builtin_ffsll: { 330 // ffs(x) -> x ? cttz(x) + 1 : 0 331 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 332 333 llvm::Type *ArgType = ArgValue->getType(); 334 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 335 336 llvm::Type *ResultType = ConvertType(E->getType()); 337 Value *Tmp = Builder.CreateAdd(Builder.CreateCall2(F, ArgValue, 338 Builder.getTrue()), 339 llvm::ConstantInt::get(ArgType, 1)); 340 Value *Zero = llvm::Constant::getNullValue(ArgType); 341 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 342 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 343 if (Result->getType() != ResultType) 344 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 345 "cast"); 346 return RValue::get(Result); 347 } 348 case Builtin::BI__builtin_parity: 349 case Builtin::BI__builtin_parityl: 350 case Builtin::BI__builtin_parityll: { 351 // parity(x) -> ctpop(x) & 1 352 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 353 354 llvm::Type *ArgType = ArgValue->getType(); 355 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 356 357 llvm::Type *ResultType = ConvertType(E->getType()); 358 Value *Tmp = Builder.CreateCall(F, ArgValue); 359 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 360 if (Result->getType() != ResultType) 361 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 362 "cast"); 363 return RValue::get(Result); 364 } 365 case Builtin::BI__builtin_popcount: 366 case Builtin::BI__builtin_popcountl: 367 case Builtin::BI__builtin_popcountll: { 368 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 369 370 llvm::Type *ArgType = ArgValue->getType(); 371 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 372 373 llvm::Type *ResultType = ConvertType(E->getType()); 374 Value *Result = Builder.CreateCall(F, ArgValue); 375 if (Result->getType() != ResultType) 376 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 377 "cast"); 378 return RValue::get(Result); 379 } 380 case Builtin::BI__builtin_expect: { 381 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 382 llvm::Type *ArgType = ArgValue->getType(); 383 384 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); 385 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 386 387 Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue, 388 "expval"); 389 return RValue::get(Result); 390 } 391 case Builtin::BI__builtin_bswap16: 392 case Builtin::BI__builtin_bswap32: 393 case Builtin::BI__builtin_bswap64: { 394 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 395 llvm::Type *ArgType = ArgValue->getType(); 396 Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType); 397 return RValue::get(Builder.CreateCall(F, ArgValue)); 398 } 399 case Builtin::BI__builtin_object_size: { 400 // We rely on constant folding to deal with expressions with side effects. 401 assert(!E->getArg(0)->HasSideEffects(getContext()) && 402 "should have been constant folded"); 403 404 // We pass this builtin onto the optimizer so that it can 405 // figure out the object size in more complex cases. 406 llvm::Type *ResType = ConvertType(E->getType()); 407 408 // LLVM only supports 0 and 2, make sure that we pass along that 409 // as a boolean. 410 Value *Ty = EmitScalarExpr(E->getArg(1)); 411 ConstantInt *CI = dyn_cast<ConstantInt>(Ty); 412 assert(CI); 413 uint64_t val = CI->getZExtValue(); 414 CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1); 415 // FIXME: Get right address space. 416 llvm::Type *Tys[] = { ResType, Builder.getInt8PtrTy(0) }; 417 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys); 418 return RValue::get(Builder.CreateCall2(F, EmitScalarExpr(E->getArg(0)),CI)); 419 } 420 case Builtin::BI__builtin_prefetch: { 421 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 422 // FIXME: Technically these constants should of type 'int', yes? 423 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 424 llvm::ConstantInt::get(Int32Ty, 0); 425 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 426 llvm::ConstantInt::get(Int32Ty, 3); 427 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 428 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 429 return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data)); 430 } 431 case Builtin::BI__builtin_readcyclecounter: { 432 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); 433 return RValue::get(Builder.CreateCall(F)); 434 } 435 case Builtin::BI__builtin___clear_cache: { 436 Value *Begin = EmitScalarExpr(E->getArg(0)); 437 Value *End = EmitScalarExpr(E->getArg(1)); 438 Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); 439 return RValue::get(Builder.CreateCall2(F, Begin, End)); 440 } 441 case Builtin::BI__builtin_trap: { 442 Value *F = CGM.getIntrinsic(Intrinsic::trap); 443 return RValue::get(Builder.CreateCall(F)); 444 } 445 case Builtin::BI__debugbreak: { 446 Value *F = CGM.getIntrinsic(Intrinsic::debugtrap); 447 return RValue::get(Builder.CreateCall(F)); 448 } 449 case Builtin::BI__builtin_unreachable: { 450 if (SanOpts->Unreachable) { 451 SanitizerScope SanScope(this); 452 EmitCheck(Builder.getFalse(), "builtin_unreachable", 453 EmitCheckSourceLocation(E->getExprLoc()), 454 ArrayRef<llvm::Value *>(), CRK_Unrecoverable); 455 } else 456 Builder.CreateUnreachable(); 457 458 // We do need to preserve an insertion point. 459 EmitBlock(createBasicBlock("unreachable.cont")); 460 461 return RValue::get(nullptr); 462 } 463 464 case Builtin::BI__builtin_powi: 465 case Builtin::BI__builtin_powif: 466 case Builtin::BI__builtin_powil: { 467 Value *Base = EmitScalarExpr(E->getArg(0)); 468 Value *Exponent = EmitScalarExpr(E->getArg(1)); 469 llvm::Type *ArgType = Base->getType(); 470 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); 471 return RValue::get(Builder.CreateCall2(F, Base, Exponent)); 472 } 473 474 case Builtin::BI__builtin_isgreater: 475 case Builtin::BI__builtin_isgreaterequal: 476 case Builtin::BI__builtin_isless: 477 case Builtin::BI__builtin_islessequal: 478 case Builtin::BI__builtin_islessgreater: 479 case Builtin::BI__builtin_isunordered: { 480 // Ordered comparisons: we know the arguments to these are matching scalar 481 // floating point values. 482 Value *LHS = EmitScalarExpr(E->getArg(0)); 483 Value *RHS = EmitScalarExpr(E->getArg(1)); 484 485 switch (BuiltinID) { 486 default: llvm_unreachable("Unknown ordered comparison"); 487 case Builtin::BI__builtin_isgreater: 488 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 489 break; 490 case Builtin::BI__builtin_isgreaterequal: 491 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 492 break; 493 case Builtin::BI__builtin_isless: 494 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 495 break; 496 case Builtin::BI__builtin_islessequal: 497 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 498 break; 499 case Builtin::BI__builtin_islessgreater: 500 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 501 break; 502 case Builtin::BI__builtin_isunordered: 503 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 504 break; 505 } 506 // ZExt bool to int type. 507 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); 508 } 509 case Builtin::BI__builtin_isnan: { 510 Value *V = EmitScalarExpr(E->getArg(0)); 511 V = Builder.CreateFCmpUNO(V, V, "cmp"); 512 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 513 } 514 515 case Builtin::BI__builtin_isinf: { 516 // isinf(x) --> fabs(x) == infinity 517 Value *V = EmitScalarExpr(E->getArg(0)); 518 V = EmitFAbs(*this, V, E->getArg(0)->getType()); 519 520 V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf"); 521 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 522 } 523 524 // TODO: BI__builtin_isinf_sign 525 // isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0 526 527 case Builtin::BI__builtin_isnormal: { 528 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 529 Value *V = EmitScalarExpr(E->getArg(0)); 530 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 531 532 Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType()); 533 Value *IsLessThanInf = 534 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 535 APFloat Smallest = APFloat::getSmallestNormalized( 536 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 537 Value *IsNormal = 538 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 539 "isnormal"); 540 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 541 V = Builder.CreateAnd(V, IsNormal, "and"); 542 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 543 } 544 545 case Builtin::BI__builtin_isfinite: { 546 // isfinite(x) --> x == x && fabs(x) != infinity; 547 Value *V = EmitScalarExpr(E->getArg(0)); 548 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 549 550 Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType()); 551 Value *IsNotInf = 552 Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 553 554 V = Builder.CreateAnd(Eq, IsNotInf, "and"); 555 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 556 } 557 558 case Builtin::BI__builtin_fpclassify: { 559 Value *V = EmitScalarExpr(E->getArg(5)); 560 llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 561 562 // Create Result 563 BasicBlock *Begin = Builder.GetInsertBlock(); 564 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 565 Builder.SetInsertPoint(End); 566 PHINode *Result = 567 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 568 "fpclassify_result"); 569 570 // if (V==0) return FP_ZERO 571 Builder.SetInsertPoint(Begin); 572 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 573 "iszero"); 574 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 575 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 576 Builder.CreateCondBr(IsZero, End, NotZero); 577 Result->addIncoming(ZeroLiteral, Begin); 578 579 // if (V != V) return FP_NAN 580 Builder.SetInsertPoint(NotZero); 581 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 582 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 583 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 584 Builder.CreateCondBr(IsNan, End, NotNan); 585 Result->addIncoming(NanLiteral, NotZero); 586 587 // if (fabs(V) == infinity) return FP_INFINITY 588 Builder.SetInsertPoint(NotNan); 589 Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType()); 590 Value *IsInf = 591 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 592 "isinf"); 593 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 594 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 595 Builder.CreateCondBr(IsInf, End, NotInf); 596 Result->addIncoming(InfLiteral, NotNan); 597 598 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 599 Builder.SetInsertPoint(NotInf); 600 APFloat Smallest = APFloat::getSmallestNormalized( 601 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 602 Value *IsNormal = 603 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 604 "isnormal"); 605 Value *NormalResult = 606 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 607 EmitScalarExpr(E->getArg(3))); 608 Builder.CreateBr(End); 609 Result->addIncoming(NormalResult, NotInf); 610 611 // return Result 612 Builder.SetInsertPoint(End); 613 return RValue::get(Result); 614 } 615 616 case Builtin::BIalloca: 617 case Builtin::BI_alloca: 618 case Builtin::BI__builtin_alloca: { 619 Value *Size = EmitScalarExpr(E->getArg(0)); 620 return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size)); 621 } 622 case Builtin::BIbzero: 623 case Builtin::BI__builtin_bzero: { 624 std::pair<llvm::Value*, unsigned> Dest = 625 EmitPointerWithAlignment(E->getArg(0)); 626 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 627 Builder.CreateMemSet(Dest.first, Builder.getInt8(0), SizeVal, 628 Dest.second, false); 629 return RValue::get(Dest.first); 630 } 631 case Builtin::BImemcpy: 632 case Builtin::BI__builtin_memcpy: { 633 std::pair<llvm::Value*, unsigned> Dest = 634 EmitPointerWithAlignment(E->getArg(0)); 635 std::pair<llvm::Value*, unsigned> Src = 636 EmitPointerWithAlignment(E->getArg(1)); 637 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 638 unsigned Align = std::min(Dest.second, Src.second); 639 Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false); 640 return RValue::get(Dest.first); 641 } 642 643 case Builtin::BI__builtin___memcpy_chk: { 644 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. 645 llvm::APSInt Size, DstSize; 646 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 647 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 648 break; 649 if (Size.ugt(DstSize)) 650 break; 651 std::pair<llvm::Value*, unsigned> Dest = 652 EmitPointerWithAlignment(E->getArg(0)); 653 std::pair<llvm::Value*, unsigned> Src = 654 EmitPointerWithAlignment(E->getArg(1)); 655 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 656 unsigned Align = std::min(Dest.second, Src.second); 657 Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false); 658 return RValue::get(Dest.first); 659 } 660 661 case Builtin::BI__builtin_objc_memmove_collectable: { 662 Value *Address = EmitScalarExpr(E->getArg(0)); 663 Value *SrcAddr = EmitScalarExpr(E->getArg(1)); 664 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 665 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 666 Address, SrcAddr, SizeVal); 667 return RValue::get(Address); 668 } 669 670 case Builtin::BI__builtin___memmove_chk: { 671 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. 672 llvm::APSInt Size, DstSize; 673 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 674 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 675 break; 676 if (Size.ugt(DstSize)) 677 break; 678 std::pair<llvm::Value*, unsigned> Dest = 679 EmitPointerWithAlignment(E->getArg(0)); 680 std::pair<llvm::Value*, unsigned> Src = 681 EmitPointerWithAlignment(E->getArg(1)); 682 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 683 unsigned Align = std::min(Dest.second, Src.second); 684 Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false); 685 return RValue::get(Dest.first); 686 } 687 688 case Builtin::BImemmove: 689 case Builtin::BI__builtin_memmove: { 690 std::pair<llvm::Value*, unsigned> Dest = 691 EmitPointerWithAlignment(E->getArg(0)); 692 std::pair<llvm::Value*, unsigned> Src = 693 EmitPointerWithAlignment(E->getArg(1)); 694 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 695 unsigned Align = std::min(Dest.second, Src.second); 696 Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false); 697 return RValue::get(Dest.first); 698 } 699 case Builtin::BImemset: 700 case Builtin::BI__builtin_memset: { 701 std::pair<llvm::Value*, unsigned> Dest = 702 EmitPointerWithAlignment(E->getArg(0)); 703 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 704 Builder.getInt8Ty()); 705 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 706 Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false); 707 return RValue::get(Dest.first); 708 } 709 case Builtin::BI__builtin___memset_chk: { 710 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 711 llvm::APSInt Size, DstSize; 712 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 713 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 714 break; 715 if (Size.ugt(DstSize)) 716 break; 717 std::pair<llvm::Value*, unsigned> Dest = 718 EmitPointerWithAlignment(E->getArg(0)); 719 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 720 Builder.getInt8Ty()); 721 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 722 Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false); 723 return RValue::get(Dest.first); 724 } 725 case Builtin::BI__builtin_dwarf_cfa: { 726 // The offset in bytes from the first argument to the CFA. 727 // 728 // Why on earth is this in the frontend? Is there any reason at 729 // all that the backend can't reasonably determine this while 730 // lowering llvm.eh.dwarf.cfa()? 731 // 732 // TODO: If there's a satisfactory reason, add a target hook for 733 // this instead of hard-coding 0, which is correct for most targets. 734 int32_t Offset = 0; 735 736 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); 737 return RValue::get(Builder.CreateCall(F, 738 llvm::ConstantInt::get(Int32Ty, Offset))); 739 } 740 case Builtin::BI__builtin_return_address: { 741 Value *Depth = EmitScalarExpr(E->getArg(0)); 742 Depth = Builder.CreateIntCast(Depth, Int32Ty, false); 743 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 744 return RValue::get(Builder.CreateCall(F, Depth)); 745 } 746 case Builtin::BI__builtin_frame_address: { 747 Value *Depth = EmitScalarExpr(E->getArg(0)); 748 Depth = Builder.CreateIntCast(Depth, Int32Ty, false); 749 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); 750 return RValue::get(Builder.CreateCall(F, Depth)); 751 } 752 case Builtin::BI__builtin_extract_return_addr: { 753 Value *Address = EmitScalarExpr(E->getArg(0)); 754 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 755 return RValue::get(Result); 756 } 757 case Builtin::BI__builtin_frob_return_addr: { 758 Value *Address = EmitScalarExpr(E->getArg(0)); 759 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 760 return RValue::get(Result); 761 } 762 case Builtin::BI__builtin_dwarf_sp_column: { 763 llvm::IntegerType *Ty 764 = cast<llvm::IntegerType>(ConvertType(E->getType())); 765 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 766 if (Column == -1) { 767 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 768 return RValue::get(llvm::UndefValue::get(Ty)); 769 } 770 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 771 } 772 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 773 Value *Address = EmitScalarExpr(E->getArg(0)); 774 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 775 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 776 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 777 } 778 case Builtin::BI__builtin_eh_return: { 779 Value *Int = EmitScalarExpr(E->getArg(0)); 780 Value *Ptr = EmitScalarExpr(E->getArg(1)); 781 782 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 783 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 784 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 785 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 786 ? Intrinsic::eh_return_i32 787 : Intrinsic::eh_return_i64); 788 Builder.CreateCall2(F, Int, Ptr); 789 Builder.CreateUnreachable(); 790 791 // We do need to preserve an insertion point. 792 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 793 794 return RValue::get(nullptr); 795 } 796 case Builtin::BI__builtin_unwind_init: { 797 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); 798 return RValue::get(Builder.CreateCall(F)); 799 } 800 case Builtin::BI__builtin_extend_pointer: { 801 // Extends a pointer to the size of an _Unwind_Word, which is 802 // uint64_t on all platforms. Generally this gets poked into a 803 // register and eventually used as an address, so if the 804 // addressing registers are wider than pointers and the platform 805 // doesn't implicitly ignore high-order bits when doing 806 // addressing, we need to make sure we zext / sext based on 807 // the platform's expectations. 808 // 809 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 810 811 // Cast the pointer to intptr_t. 812 Value *Ptr = EmitScalarExpr(E->getArg(0)); 813 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 814 815 // If that's 64 bits, we're done. 816 if (IntPtrTy->getBitWidth() == 64) 817 return RValue::get(Result); 818 819 // Otherwise, ask the codegen data what to do. 820 if (getTargetHooks().extendPointerWithSExt()) 821 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 822 else 823 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 824 } 825 case Builtin::BI__builtin_setjmp: { 826 // Buffer is a void**. 827 Value *Buf = EmitScalarExpr(E->getArg(0)); 828 829 // Store the frame pointer to the setjmp buffer. 830 Value *FrameAddr = 831 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 832 ConstantInt::get(Int32Ty, 0)); 833 Builder.CreateStore(FrameAddr, Buf); 834 835 // Store the stack pointer to the setjmp buffer. 836 Value *StackAddr = 837 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 838 Value *StackSaveSlot = 839 Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2)); 840 Builder.CreateStore(StackAddr, StackSaveSlot); 841 842 // Call LLVM's EH setjmp, which is lightweight. 843 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 844 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 845 return RValue::get(Builder.CreateCall(F, Buf)); 846 } 847 case Builtin::BI__builtin_longjmp: { 848 Value *Buf = EmitScalarExpr(E->getArg(0)); 849 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 850 851 // Call LLVM's EH longjmp, which is lightweight. 852 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 853 854 // longjmp doesn't return; mark this as unreachable. 855 Builder.CreateUnreachable(); 856 857 // We do need to preserve an insertion point. 858 EmitBlock(createBasicBlock("longjmp.cont")); 859 860 return RValue::get(nullptr); 861 } 862 case Builtin::BI__sync_fetch_and_add: 863 case Builtin::BI__sync_fetch_and_sub: 864 case Builtin::BI__sync_fetch_and_or: 865 case Builtin::BI__sync_fetch_and_and: 866 case Builtin::BI__sync_fetch_and_xor: 867 case Builtin::BI__sync_add_and_fetch: 868 case Builtin::BI__sync_sub_and_fetch: 869 case Builtin::BI__sync_and_and_fetch: 870 case Builtin::BI__sync_or_and_fetch: 871 case Builtin::BI__sync_xor_and_fetch: 872 case Builtin::BI__sync_val_compare_and_swap: 873 case Builtin::BI__sync_bool_compare_and_swap: 874 case Builtin::BI__sync_lock_test_and_set: 875 case Builtin::BI__sync_lock_release: 876 case Builtin::BI__sync_swap: 877 llvm_unreachable("Shouldn't make it through sema"); 878 case Builtin::BI__sync_fetch_and_add_1: 879 case Builtin::BI__sync_fetch_and_add_2: 880 case Builtin::BI__sync_fetch_and_add_4: 881 case Builtin::BI__sync_fetch_and_add_8: 882 case Builtin::BI__sync_fetch_and_add_16: 883 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); 884 case Builtin::BI__sync_fetch_and_sub_1: 885 case Builtin::BI__sync_fetch_and_sub_2: 886 case Builtin::BI__sync_fetch_and_sub_4: 887 case Builtin::BI__sync_fetch_and_sub_8: 888 case Builtin::BI__sync_fetch_and_sub_16: 889 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); 890 case Builtin::BI__sync_fetch_and_or_1: 891 case Builtin::BI__sync_fetch_and_or_2: 892 case Builtin::BI__sync_fetch_and_or_4: 893 case Builtin::BI__sync_fetch_and_or_8: 894 case Builtin::BI__sync_fetch_and_or_16: 895 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); 896 case Builtin::BI__sync_fetch_and_and_1: 897 case Builtin::BI__sync_fetch_and_and_2: 898 case Builtin::BI__sync_fetch_and_and_4: 899 case Builtin::BI__sync_fetch_and_and_8: 900 case Builtin::BI__sync_fetch_and_and_16: 901 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); 902 case Builtin::BI__sync_fetch_and_xor_1: 903 case Builtin::BI__sync_fetch_and_xor_2: 904 case Builtin::BI__sync_fetch_and_xor_4: 905 case Builtin::BI__sync_fetch_and_xor_8: 906 case Builtin::BI__sync_fetch_and_xor_16: 907 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); 908 909 // Clang extensions: not overloaded yet. 910 case Builtin::BI__sync_fetch_and_min: 911 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); 912 case Builtin::BI__sync_fetch_and_max: 913 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); 914 case Builtin::BI__sync_fetch_and_umin: 915 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); 916 case Builtin::BI__sync_fetch_and_umax: 917 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); 918 919 case Builtin::BI__sync_add_and_fetch_1: 920 case Builtin::BI__sync_add_and_fetch_2: 921 case Builtin::BI__sync_add_and_fetch_4: 922 case Builtin::BI__sync_add_and_fetch_8: 923 case Builtin::BI__sync_add_and_fetch_16: 924 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, 925 llvm::Instruction::Add); 926 case Builtin::BI__sync_sub_and_fetch_1: 927 case Builtin::BI__sync_sub_and_fetch_2: 928 case Builtin::BI__sync_sub_and_fetch_4: 929 case Builtin::BI__sync_sub_and_fetch_8: 930 case Builtin::BI__sync_sub_and_fetch_16: 931 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, 932 llvm::Instruction::Sub); 933 case Builtin::BI__sync_and_and_fetch_1: 934 case Builtin::BI__sync_and_and_fetch_2: 935 case Builtin::BI__sync_and_and_fetch_4: 936 case Builtin::BI__sync_and_and_fetch_8: 937 case Builtin::BI__sync_and_and_fetch_16: 938 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, 939 llvm::Instruction::And); 940 case Builtin::BI__sync_or_and_fetch_1: 941 case Builtin::BI__sync_or_and_fetch_2: 942 case Builtin::BI__sync_or_and_fetch_4: 943 case Builtin::BI__sync_or_and_fetch_8: 944 case Builtin::BI__sync_or_and_fetch_16: 945 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, 946 llvm::Instruction::Or); 947 case Builtin::BI__sync_xor_and_fetch_1: 948 case Builtin::BI__sync_xor_and_fetch_2: 949 case Builtin::BI__sync_xor_and_fetch_4: 950 case Builtin::BI__sync_xor_and_fetch_8: 951 case Builtin::BI__sync_xor_and_fetch_16: 952 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, 953 llvm::Instruction::Xor); 954 955 case Builtin::BI__sync_val_compare_and_swap_1: 956 case Builtin::BI__sync_val_compare_and_swap_2: 957 case Builtin::BI__sync_val_compare_and_swap_4: 958 case Builtin::BI__sync_val_compare_and_swap_8: 959 case Builtin::BI__sync_val_compare_and_swap_16: { 960 QualType T = E->getType(); 961 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0)); 962 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 963 964 llvm::IntegerType *IntType = 965 llvm::IntegerType::get(getLLVMContext(), 966 getContext().getTypeSize(T)); 967 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 968 969 Value *Args[3]; 970 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType); 971 Args[1] = EmitScalarExpr(E->getArg(1)); 972 llvm::Type *ValueType = Args[1]->getType(); 973 Args[1] = EmitToInt(*this, Args[1], T, IntType); 974 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType); 975 976 Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2], 977 llvm::SequentiallyConsistent, 978 llvm::SequentiallyConsistent); 979 Result = Builder.CreateExtractValue(Result, 0); 980 Result = EmitFromInt(*this, Result, T, ValueType); 981 return RValue::get(Result); 982 } 983 984 case Builtin::BI__sync_bool_compare_and_swap_1: 985 case Builtin::BI__sync_bool_compare_and_swap_2: 986 case Builtin::BI__sync_bool_compare_and_swap_4: 987 case Builtin::BI__sync_bool_compare_and_swap_8: 988 case Builtin::BI__sync_bool_compare_and_swap_16: { 989 QualType T = E->getArg(1)->getType(); 990 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0)); 991 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 992 993 llvm::IntegerType *IntType = 994 llvm::IntegerType::get(getLLVMContext(), 995 getContext().getTypeSize(T)); 996 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 997 998 Value *Args[3]; 999 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType); 1000 Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType); 1001 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType); 1002 1003 Value *Pair = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2], 1004 llvm::SequentiallyConsistent, 1005 llvm::SequentiallyConsistent); 1006 Value *Result = Builder.CreateExtractValue(Pair, 1); 1007 // zext bool to int. 1008 Result = Builder.CreateZExt(Result, ConvertType(E->getType())); 1009 return RValue::get(Result); 1010 } 1011 1012 case Builtin::BI__sync_swap_1: 1013 case Builtin::BI__sync_swap_2: 1014 case Builtin::BI__sync_swap_4: 1015 case Builtin::BI__sync_swap_8: 1016 case Builtin::BI__sync_swap_16: 1017 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1018 1019 case Builtin::BI__sync_lock_test_and_set_1: 1020 case Builtin::BI__sync_lock_test_and_set_2: 1021 case Builtin::BI__sync_lock_test_and_set_4: 1022 case Builtin::BI__sync_lock_test_and_set_8: 1023 case Builtin::BI__sync_lock_test_and_set_16: 1024 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1025 1026 case Builtin::BI__sync_lock_release_1: 1027 case Builtin::BI__sync_lock_release_2: 1028 case Builtin::BI__sync_lock_release_4: 1029 case Builtin::BI__sync_lock_release_8: 1030 case Builtin::BI__sync_lock_release_16: { 1031 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1032 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 1033 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 1034 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 1035 StoreSize.getQuantity() * 8); 1036 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 1037 llvm::StoreInst *Store = 1038 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr); 1039 Store->setAlignment(StoreSize.getQuantity()); 1040 Store->setAtomic(llvm::Release); 1041 return RValue::get(nullptr); 1042 } 1043 1044 case Builtin::BI__sync_synchronize: { 1045 // We assume this is supposed to correspond to a C++0x-style 1046 // sequentially-consistent fence (i.e. this is only usable for 1047 // synchonization, not device I/O or anything like that). This intrinsic 1048 // is really badly designed in the sense that in theory, there isn't 1049 // any way to safely use it... but in practice, it mostly works 1050 // to use it with non-atomic loads and stores to get acquire/release 1051 // semantics. 1052 Builder.CreateFence(llvm::SequentiallyConsistent); 1053 return RValue::get(nullptr); 1054 } 1055 1056 case Builtin::BI__c11_atomic_is_lock_free: 1057 case Builtin::BI__atomic_is_lock_free: { 1058 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the 1059 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since 1060 // _Atomic(T) is always properly-aligned. 1061 const char *LibCallName = "__atomic_is_lock_free"; 1062 CallArgList Args; 1063 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), 1064 getContext().getSizeType()); 1065 if (BuiltinID == Builtin::BI__atomic_is_lock_free) 1066 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), 1067 getContext().VoidPtrTy); 1068 else 1069 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), 1070 getContext().VoidPtrTy); 1071 const CGFunctionInfo &FuncInfo = 1072 CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args, 1073 FunctionType::ExtInfo(), 1074 RequiredArgs::All); 1075 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 1076 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 1077 return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args); 1078 } 1079 1080 case Builtin::BI__atomic_test_and_set: { 1081 // Look at the argument type to determine whether this is a volatile 1082 // operation. The parameter type is always volatile. 1083 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1084 bool Volatile = 1085 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1086 1087 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1088 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1089 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1090 Value *NewVal = Builder.getInt8(1); 1091 Value *Order = EmitScalarExpr(E->getArg(1)); 1092 if (isa<llvm::ConstantInt>(Order)) { 1093 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1094 AtomicRMWInst *Result = nullptr; 1095 switch (ord) { 1096 case 0: // memory_order_relaxed 1097 default: // invalid order 1098 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1099 Ptr, NewVal, 1100 llvm::Monotonic); 1101 break; 1102 case 1: // memory_order_consume 1103 case 2: // memory_order_acquire 1104 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1105 Ptr, NewVal, 1106 llvm::Acquire); 1107 break; 1108 case 3: // memory_order_release 1109 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1110 Ptr, NewVal, 1111 llvm::Release); 1112 break; 1113 case 4: // memory_order_acq_rel 1114 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1115 Ptr, NewVal, 1116 llvm::AcquireRelease); 1117 break; 1118 case 5: // memory_order_seq_cst 1119 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1120 Ptr, NewVal, 1121 llvm::SequentiallyConsistent); 1122 break; 1123 } 1124 Result->setVolatile(Volatile); 1125 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1126 } 1127 1128 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1129 1130 llvm::BasicBlock *BBs[5] = { 1131 createBasicBlock("monotonic", CurFn), 1132 createBasicBlock("acquire", CurFn), 1133 createBasicBlock("release", CurFn), 1134 createBasicBlock("acqrel", CurFn), 1135 createBasicBlock("seqcst", CurFn) 1136 }; 1137 llvm::AtomicOrdering Orders[5] = { 1138 llvm::Monotonic, llvm::Acquire, llvm::Release, 1139 llvm::AcquireRelease, llvm::SequentiallyConsistent 1140 }; 1141 1142 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1143 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1144 1145 Builder.SetInsertPoint(ContBB); 1146 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); 1147 1148 for (unsigned i = 0; i < 5; ++i) { 1149 Builder.SetInsertPoint(BBs[i]); 1150 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1151 Ptr, NewVal, Orders[i]); 1152 RMW->setVolatile(Volatile); 1153 Result->addIncoming(RMW, BBs[i]); 1154 Builder.CreateBr(ContBB); 1155 } 1156 1157 SI->addCase(Builder.getInt32(0), BBs[0]); 1158 SI->addCase(Builder.getInt32(1), BBs[1]); 1159 SI->addCase(Builder.getInt32(2), BBs[1]); 1160 SI->addCase(Builder.getInt32(3), BBs[2]); 1161 SI->addCase(Builder.getInt32(4), BBs[3]); 1162 SI->addCase(Builder.getInt32(5), BBs[4]); 1163 1164 Builder.SetInsertPoint(ContBB); 1165 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1166 } 1167 1168 case Builtin::BI__atomic_clear: { 1169 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1170 bool Volatile = 1171 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1172 1173 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1174 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1175 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1176 Value *NewVal = Builder.getInt8(0); 1177 Value *Order = EmitScalarExpr(E->getArg(1)); 1178 if (isa<llvm::ConstantInt>(Order)) { 1179 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1180 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1181 Store->setAlignment(1); 1182 switch (ord) { 1183 case 0: // memory_order_relaxed 1184 default: // invalid order 1185 Store->setOrdering(llvm::Monotonic); 1186 break; 1187 case 3: // memory_order_release 1188 Store->setOrdering(llvm::Release); 1189 break; 1190 case 5: // memory_order_seq_cst 1191 Store->setOrdering(llvm::SequentiallyConsistent); 1192 break; 1193 } 1194 return RValue::get(nullptr); 1195 } 1196 1197 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1198 1199 llvm::BasicBlock *BBs[3] = { 1200 createBasicBlock("monotonic", CurFn), 1201 createBasicBlock("release", CurFn), 1202 createBasicBlock("seqcst", CurFn) 1203 }; 1204 llvm::AtomicOrdering Orders[3] = { 1205 llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent 1206 }; 1207 1208 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1209 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1210 1211 for (unsigned i = 0; i < 3; ++i) { 1212 Builder.SetInsertPoint(BBs[i]); 1213 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1214 Store->setAlignment(1); 1215 Store->setOrdering(Orders[i]); 1216 Builder.CreateBr(ContBB); 1217 } 1218 1219 SI->addCase(Builder.getInt32(0), BBs[0]); 1220 SI->addCase(Builder.getInt32(3), BBs[1]); 1221 SI->addCase(Builder.getInt32(5), BBs[2]); 1222 1223 Builder.SetInsertPoint(ContBB); 1224 return RValue::get(nullptr); 1225 } 1226 1227 case Builtin::BI__atomic_thread_fence: 1228 case Builtin::BI__atomic_signal_fence: 1229 case Builtin::BI__c11_atomic_thread_fence: 1230 case Builtin::BI__c11_atomic_signal_fence: { 1231 llvm::SynchronizationScope Scope; 1232 if (BuiltinID == Builtin::BI__atomic_signal_fence || 1233 BuiltinID == Builtin::BI__c11_atomic_signal_fence) 1234 Scope = llvm::SingleThread; 1235 else 1236 Scope = llvm::CrossThread; 1237 Value *Order = EmitScalarExpr(E->getArg(0)); 1238 if (isa<llvm::ConstantInt>(Order)) { 1239 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1240 switch (ord) { 1241 case 0: // memory_order_relaxed 1242 default: // invalid order 1243 break; 1244 case 1: // memory_order_consume 1245 case 2: // memory_order_acquire 1246 Builder.CreateFence(llvm::Acquire, Scope); 1247 break; 1248 case 3: // memory_order_release 1249 Builder.CreateFence(llvm::Release, Scope); 1250 break; 1251 case 4: // memory_order_acq_rel 1252 Builder.CreateFence(llvm::AcquireRelease, Scope); 1253 break; 1254 case 5: // memory_order_seq_cst 1255 Builder.CreateFence(llvm::SequentiallyConsistent, Scope); 1256 break; 1257 } 1258 return RValue::get(nullptr); 1259 } 1260 1261 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; 1262 AcquireBB = createBasicBlock("acquire", CurFn); 1263 ReleaseBB = createBasicBlock("release", CurFn); 1264 AcqRelBB = createBasicBlock("acqrel", CurFn); 1265 SeqCstBB = createBasicBlock("seqcst", CurFn); 1266 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1267 1268 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1269 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); 1270 1271 Builder.SetInsertPoint(AcquireBB); 1272 Builder.CreateFence(llvm::Acquire, Scope); 1273 Builder.CreateBr(ContBB); 1274 SI->addCase(Builder.getInt32(1), AcquireBB); 1275 SI->addCase(Builder.getInt32(2), AcquireBB); 1276 1277 Builder.SetInsertPoint(ReleaseBB); 1278 Builder.CreateFence(llvm::Release, Scope); 1279 Builder.CreateBr(ContBB); 1280 SI->addCase(Builder.getInt32(3), ReleaseBB); 1281 1282 Builder.SetInsertPoint(AcqRelBB); 1283 Builder.CreateFence(llvm::AcquireRelease, Scope); 1284 Builder.CreateBr(ContBB); 1285 SI->addCase(Builder.getInt32(4), AcqRelBB); 1286 1287 Builder.SetInsertPoint(SeqCstBB); 1288 Builder.CreateFence(llvm::SequentiallyConsistent, Scope); 1289 Builder.CreateBr(ContBB); 1290 SI->addCase(Builder.getInt32(5), SeqCstBB); 1291 1292 Builder.SetInsertPoint(ContBB); 1293 return RValue::get(nullptr); 1294 } 1295 1296 // Library functions with special handling. 1297 case Builtin::BIsqrt: 1298 case Builtin::BIsqrtf: 1299 case Builtin::BIsqrtl: { 1300 // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only 1301 // in finite- or unsafe-math mode (the intrinsic has different semantics 1302 // for handling negative numbers compared to the library function, so 1303 // -fmath-errno=0 is not enough). 1304 if (!FD->hasAttr<ConstAttr>()) 1305 break; 1306 if (!(CGM.getCodeGenOpts().UnsafeFPMath || 1307 CGM.getCodeGenOpts().NoNaNsFPMath)) 1308 break; 1309 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 1310 llvm::Type *ArgType = Arg0->getType(); 1311 Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType); 1312 return RValue::get(Builder.CreateCall(F, Arg0)); 1313 } 1314 1315 case Builtin::BIpow: 1316 case Builtin::BIpowf: 1317 case Builtin::BIpowl: { 1318 // Transform a call to pow* into a @llvm.pow.* intrinsic call. 1319 if (!FD->hasAttr<ConstAttr>()) 1320 break; 1321 Value *Base = EmitScalarExpr(E->getArg(0)); 1322 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1323 llvm::Type *ArgType = Base->getType(); 1324 Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType); 1325 return RValue::get(Builder.CreateCall2(F, Base, Exponent)); 1326 } 1327 1328 case Builtin::BIfma: 1329 case Builtin::BIfmaf: 1330 case Builtin::BIfmal: 1331 case Builtin::BI__builtin_fma: 1332 case Builtin::BI__builtin_fmaf: 1333 case Builtin::BI__builtin_fmal: { 1334 // Rewrite fma to intrinsic. 1335 Value *FirstArg = EmitScalarExpr(E->getArg(0)); 1336 llvm::Type *ArgType = FirstArg->getType(); 1337 Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType); 1338 return RValue::get(Builder.CreateCall3(F, FirstArg, 1339 EmitScalarExpr(E->getArg(1)), 1340 EmitScalarExpr(E->getArg(2)))); 1341 } 1342 1343 case Builtin::BI__builtin_signbit: 1344 case Builtin::BI__builtin_signbitf: 1345 case Builtin::BI__builtin_signbitl: { 1346 LLVMContext &C = CGM.getLLVMContext(); 1347 1348 Value *Arg = EmitScalarExpr(E->getArg(0)); 1349 llvm::Type *ArgTy = Arg->getType(); 1350 if (ArgTy->isPPC_FP128Ty()) 1351 break; // FIXME: I'm not sure what the right implementation is here. 1352 int ArgWidth = ArgTy->getPrimitiveSizeInBits(); 1353 llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth); 1354 Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy); 1355 Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy); 1356 Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp); 1357 return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType()))); 1358 } 1359 case Builtin::BI__builtin_annotation: { 1360 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); 1361 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, 1362 AnnVal->getType()); 1363 1364 // Get the annotation string, go through casts. Sema requires this to be a 1365 // non-wide string literal, potentially casted, so the cast<> is safe. 1366 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); 1367 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); 1368 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); 1369 } 1370 case Builtin::BI__builtin_addcb: 1371 case Builtin::BI__builtin_addcs: 1372 case Builtin::BI__builtin_addc: 1373 case Builtin::BI__builtin_addcl: 1374 case Builtin::BI__builtin_addcll: 1375 case Builtin::BI__builtin_subcb: 1376 case Builtin::BI__builtin_subcs: 1377 case Builtin::BI__builtin_subc: 1378 case Builtin::BI__builtin_subcl: 1379 case Builtin::BI__builtin_subcll: { 1380 1381 // We translate all of these builtins from expressions of the form: 1382 // int x = ..., y = ..., carryin = ..., carryout, result; 1383 // result = __builtin_addc(x, y, carryin, &carryout); 1384 // 1385 // to LLVM IR of the form: 1386 // 1387 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) 1388 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 1389 // %carry1 = extractvalue {i32, i1} %tmp1, 1 1390 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, 1391 // i32 %carryin) 1392 // %result = extractvalue {i32, i1} %tmp2, 0 1393 // %carry2 = extractvalue {i32, i1} %tmp2, 1 1394 // %tmp3 = or i1 %carry1, %carry2 1395 // %tmp4 = zext i1 %tmp3 to i32 1396 // store i32 %tmp4, i32* %carryout 1397 1398 // Scalarize our inputs. 1399 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1400 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1401 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); 1402 std::pair<llvm::Value*, unsigned> CarryOutPtr = 1403 EmitPointerWithAlignment(E->getArg(3)); 1404 1405 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. 1406 llvm::Intrinsic::ID IntrinsicId; 1407 switch (BuiltinID) { 1408 default: llvm_unreachable("Unknown multiprecision builtin id."); 1409 case Builtin::BI__builtin_addcb: 1410 case Builtin::BI__builtin_addcs: 1411 case Builtin::BI__builtin_addc: 1412 case Builtin::BI__builtin_addcl: 1413 case Builtin::BI__builtin_addcll: 1414 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 1415 break; 1416 case Builtin::BI__builtin_subcb: 1417 case Builtin::BI__builtin_subcs: 1418 case Builtin::BI__builtin_subc: 1419 case Builtin::BI__builtin_subcl: 1420 case Builtin::BI__builtin_subcll: 1421 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 1422 break; 1423 } 1424 1425 // Construct our resulting LLVM IR expression. 1426 llvm::Value *Carry1; 1427 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, 1428 X, Y, Carry1); 1429 llvm::Value *Carry2; 1430 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, 1431 Sum1, Carryin, Carry2); 1432 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), 1433 X->getType()); 1434 llvm::StoreInst *CarryOutStore = Builder.CreateStore(CarryOut, 1435 CarryOutPtr.first); 1436 CarryOutStore->setAlignment(CarryOutPtr.second); 1437 return RValue::get(Sum2); 1438 } 1439 case Builtin::BI__builtin_uadd_overflow: 1440 case Builtin::BI__builtin_uaddl_overflow: 1441 case Builtin::BI__builtin_uaddll_overflow: 1442 case Builtin::BI__builtin_usub_overflow: 1443 case Builtin::BI__builtin_usubl_overflow: 1444 case Builtin::BI__builtin_usubll_overflow: 1445 case Builtin::BI__builtin_umul_overflow: 1446 case Builtin::BI__builtin_umull_overflow: 1447 case Builtin::BI__builtin_umulll_overflow: 1448 case Builtin::BI__builtin_sadd_overflow: 1449 case Builtin::BI__builtin_saddl_overflow: 1450 case Builtin::BI__builtin_saddll_overflow: 1451 case Builtin::BI__builtin_ssub_overflow: 1452 case Builtin::BI__builtin_ssubl_overflow: 1453 case Builtin::BI__builtin_ssubll_overflow: 1454 case Builtin::BI__builtin_smul_overflow: 1455 case Builtin::BI__builtin_smull_overflow: 1456 case Builtin::BI__builtin_smulll_overflow: { 1457 1458 // We translate all of these builtins directly to the relevant llvm IR node. 1459 1460 // Scalarize our inputs. 1461 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1462 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1463 std::pair<llvm::Value *, unsigned> SumOutPtr = 1464 EmitPointerWithAlignment(E->getArg(2)); 1465 1466 // Decide which of the overflow intrinsics we are lowering to: 1467 llvm::Intrinsic::ID IntrinsicId; 1468 switch (BuiltinID) { 1469 default: llvm_unreachable("Unknown security overflow builtin id."); 1470 case Builtin::BI__builtin_uadd_overflow: 1471 case Builtin::BI__builtin_uaddl_overflow: 1472 case Builtin::BI__builtin_uaddll_overflow: 1473 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 1474 break; 1475 case Builtin::BI__builtin_usub_overflow: 1476 case Builtin::BI__builtin_usubl_overflow: 1477 case Builtin::BI__builtin_usubll_overflow: 1478 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 1479 break; 1480 case Builtin::BI__builtin_umul_overflow: 1481 case Builtin::BI__builtin_umull_overflow: 1482 case Builtin::BI__builtin_umulll_overflow: 1483 IntrinsicId = llvm::Intrinsic::umul_with_overflow; 1484 break; 1485 case Builtin::BI__builtin_sadd_overflow: 1486 case Builtin::BI__builtin_saddl_overflow: 1487 case Builtin::BI__builtin_saddll_overflow: 1488 IntrinsicId = llvm::Intrinsic::sadd_with_overflow; 1489 break; 1490 case Builtin::BI__builtin_ssub_overflow: 1491 case Builtin::BI__builtin_ssubl_overflow: 1492 case Builtin::BI__builtin_ssubll_overflow: 1493 IntrinsicId = llvm::Intrinsic::ssub_with_overflow; 1494 break; 1495 case Builtin::BI__builtin_smul_overflow: 1496 case Builtin::BI__builtin_smull_overflow: 1497 case Builtin::BI__builtin_smulll_overflow: 1498 IntrinsicId = llvm::Intrinsic::smul_with_overflow; 1499 break; 1500 } 1501 1502 1503 llvm::Value *Carry; 1504 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); 1505 llvm::StoreInst *SumOutStore = Builder.CreateStore(Sum, SumOutPtr.first); 1506 SumOutStore->setAlignment(SumOutPtr.second); 1507 1508 return RValue::get(Carry); 1509 } 1510 case Builtin::BI__builtin_addressof: 1511 return RValue::get(EmitLValue(E->getArg(0)).getAddress()); 1512 case Builtin::BI__builtin_operator_new: 1513 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 1514 E->getArg(0), false); 1515 case Builtin::BI__builtin_operator_delete: 1516 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 1517 E->getArg(0), true); 1518 case Builtin::BI__noop: 1519 // __noop always evaluates to an integer literal zero. 1520 return RValue::get(ConstantInt::get(IntTy, 0)); 1521 case Builtin::BI__assume: 1522 // Until LLVM supports assumptions at the IR level, this becomes nothing. 1523 return RValue::get(nullptr); 1524 case Builtin::BI_InterlockedExchange: 1525 case Builtin::BI_InterlockedExchangePointer: 1526 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1527 case Builtin::BI_InterlockedCompareExchangePointer: { 1528 llvm::Type *RTy; 1529 llvm::IntegerType *IntType = 1530 IntegerType::get(getLLVMContext(), 1531 getContext().getTypeSize(E->getType())); 1532 llvm::Type *IntPtrType = IntType->getPointerTo(); 1533 1534 llvm::Value *Destination = 1535 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType); 1536 1537 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); 1538 RTy = Exchange->getType(); 1539 Exchange = Builder.CreatePtrToInt(Exchange, IntType); 1540 1541 llvm::Value *Comparand = 1542 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); 1543 1544 auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, 1545 SequentiallyConsistent, 1546 SequentiallyConsistent); 1547 Result->setVolatile(true); 1548 1549 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, 1550 0), 1551 RTy)); 1552 } 1553 case Builtin::BI_InterlockedCompareExchange: { 1554 AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg( 1555 EmitScalarExpr(E->getArg(0)), 1556 EmitScalarExpr(E->getArg(2)), 1557 EmitScalarExpr(E->getArg(1)), 1558 SequentiallyConsistent, 1559 SequentiallyConsistent); 1560 CXI->setVolatile(true); 1561 return RValue::get(Builder.CreateExtractValue(CXI, 0)); 1562 } 1563 case Builtin::BI_InterlockedIncrement: { 1564 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 1565 AtomicRMWInst::Add, 1566 EmitScalarExpr(E->getArg(0)), 1567 ConstantInt::get(Int32Ty, 1), 1568 llvm::SequentiallyConsistent); 1569 RMWI->setVolatile(true); 1570 return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(Int32Ty, 1))); 1571 } 1572 case Builtin::BI_InterlockedDecrement: { 1573 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 1574 AtomicRMWInst::Sub, 1575 EmitScalarExpr(E->getArg(0)), 1576 ConstantInt::get(Int32Ty, 1), 1577 llvm::SequentiallyConsistent); 1578 RMWI->setVolatile(true); 1579 return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(Int32Ty, 1))); 1580 } 1581 case Builtin::BI_InterlockedExchangeAdd: { 1582 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 1583 AtomicRMWInst::Add, 1584 EmitScalarExpr(E->getArg(0)), 1585 EmitScalarExpr(E->getArg(1)), 1586 llvm::SequentiallyConsistent); 1587 RMWI->setVolatile(true); 1588 return RValue::get(RMWI); 1589 } 1590 } 1591 1592 // If this is an alias for a lib function (e.g. __builtin_sin), emit 1593 // the call using the normal call path, but using the unmangled 1594 // version of the function name. 1595 if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) 1596 return emitLibraryCall(*this, FD, E, 1597 CGM.getBuiltinLibFunction(FD, BuiltinID)); 1598 1599 // If this is a predefined lib function (e.g. malloc), emit the call 1600 // using exactly the normal call path. 1601 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 1602 return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee())); 1603 1604 // See if we have a target specific intrinsic. 1605 const char *Name = getContext().BuiltinInfo.GetName(BuiltinID); 1606 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 1607 if (const char *Prefix = 1608 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) { 1609 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name); 1610 // NOTE we dont need to perform a compatibility flag check here since the 1611 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the 1612 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. 1613 if (IntrinsicID == Intrinsic::not_intrinsic) 1614 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix, Name); 1615 } 1616 1617 if (IntrinsicID != Intrinsic::not_intrinsic) { 1618 SmallVector<Value*, 16> Args; 1619 1620 // Find out if any arguments are required to be integer constant 1621 // expressions. 1622 unsigned ICEArguments = 0; 1623 ASTContext::GetBuiltinTypeError Error; 1624 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 1625 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 1626 1627 Function *F = CGM.getIntrinsic(IntrinsicID); 1628 llvm::FunctionType *FTy = F->getFunctionType(); 1629 1630 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 1631 Value *ArgValue; 1632 // If this is a normal argument, just emit it as a scalar. 1633 if ((ICEArguments & (1 << i)) == 0) { 1634 ArgValue = EmitScalarExpr(E->getArg(i)); 1635 } else { 1636 // If this is required to be a constant, constant fold it so that we 1637 // know that the generated intrinsic gets a ConstantInt. 1638 llvm::APSInt Result; 1639 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 1640 assert(IsConst && "Constant arg isn't actually constant?"); 1641 (void)IsConst; 1642 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 1643 } 1644 1645 // If the intrinsic arg type is different from the builtin arg type 1646 // we need to do a bit cast. 1647 llvm::Type *PTy = FTy->getParamType(i); 1648 if (PTy != ArgValue->getType()) { 1649 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 1650 "Must be able to losslessly bit cast to param"); 1651 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 1652 } 1653 1654 Args.push_back(ArgValue); 1655 } 1656 1657 Value *V = Builder.CreateCall(F, Args); 1658 QualType BuiltinRetType = E->getType(); 1659 1660 llvm::Type *RetTy = VoidTy; 1661 if (!BuiltinRetType->isVoidType()) 1662 RetTy = ConvertType(BuiltinRetType); 1663 1664 if (RetTy != V->getType()) { 1665 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 1666 "Must be able to losslessly bit cast result type"); 1667 V = Builder.CreateBitCast(V, RetTy); 1668 } 1669 1670 return RValue::get(V); 1671 } 1672 1673 // See if we have a target specific builtin that needs to be lowered. 1674 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 1675 return RValue::get(V); 1676 1677 ErrorUnsupported(E, "builtin function"); 1678 1679 // Unknown builtin, for now just dump it out and return undef. 1680 return GetUndefRValue(E->getType()); 1681 } 1682 1683 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 1684 const CallExpr *E) { 1685 switch (getTarget().getTriple().getArch()) { 1686 case llvm::Triple::arm: 1687 case llvm::Triple::armeb: 1688 case llvm::Triple::thumb: 1689 case llvm::Triple::thumbeb: 1690 return EmitARMBuiltinExpr(BuiltinID, E); 1691 case llvm::Triple::aarch64: 1692 case llvm::Triple::aarch64_be: 1693 return EmitAArch64BuiltinExpr(BuiltinID, E); 1694 case llvm::Triple::x86: 1695 case llvm::Triple::x86_64: 1696 return EmitX86BuiltinExpr(BuiltinID, E); 1697 case llvm::Triple::ppc: 1698 case llvm::Triple::ppc64: 1699 case llvm::Triple::ppc64le: 1700 return EmitPPCBuiltinExpr(BuiltinID, E); 1701 case llvm::Triple::r600: 1702 return EmitR600BuiltinExpr(BuiltinID, E); 1703 default: 1704 return nullptr; 1705 } 1706 } 1707 1708 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, 1709 NeonTypeFlags TypeFlags, 1710 bool V1Ty=false) { 1711 int IsQuad = TypeFlags.isQuad(); 1712 switch (TypeFlags.getEltType()) { 1713 case NeonTypeFlags::Int8: 1714 case NeonTypeFlags::Poly8: 1715 return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); 1716 case NeonTypeFlags::Int16: 1717 case NeonTypeFlags::Poly16: 1718 case NeonTypeFlags::Float16: 1719 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 1720 case NeonTypeFlags::Int32: 1721 return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); 1722 case NeonTypeFlags::Int64: 1723 case NeonTypeFlags::Poly64: 1724 return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); 1725 case NeonTypeFlags::Poly128: 1726 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. 1727 // There is a lot of i128 and f128 API missing. 1728 // so we use v16i8 to represent poly128 and get pattern matched. 1729 return llvm::VectorType::get(CGF->Int8Ty, 16); 1730 case NeonTypeFlags::Float32: 1731 return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); 1732 case NeonTypeFlags::Float64: 1733 return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); 1734 } 1735 llvm_unreachable("Unknown vector element type!"); 1736 } 1737 1738 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 1739 unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements(); 1740 Value* SV = llvm::ConstantVector::getSplat(nElts, C); 1741 return Builder.CreateShuffleVector(V, V, SV, "lane"); 1742 } 1743 1744 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 1745 const char *name, 1746 unsigned shift, bool rightshift) { 1747 unsigned j = 0; 1748 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 1749 ai != ae; ++ai, ++j) 1750 if (shift > 0 && shift == j) 1751 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 1752 else 1753 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 1754 1755 return Builder.CreateCall(F, Ops, name); 1756 } 1757 1758 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 1759 bool neg) { 1760 int SV = cast<ConstantInt>(V)->getSExtValue(); 1761 1762 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 1763 llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV); 1764 return llvm::ConstantVector::getSplat(VTy->getNumElements(), C); 1765 } 1766 1767 // \brief Right-shift a vector by a constant. 1768 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, 1769 llvm::Type *Ty, bool usgn, 1770 const char *name) { 1771 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 1772 1773 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); 1774 int EltSize = VTy->getScalarSizeInBits(); 1775 1776 Vec = Builder.CreateBitCast(Vec, Ty); 1777 1778 // lshr/ashr are undefined when the shift amount is equal to the vector 1779 // element size. 1780 if (ShiftAmt == EltSize) { 1781 if (usgn) { 1782 // Right-shifting an unsigned value by its size yields 0. 1783 llvm::Constant *Zero = ConstantInt::get(VTy->getElementType(), 0); 1784 return llvm::ConstantVector::getSplat(VTy->getNumElements(), Zero); 1785 } else { 1786 // Right-shifting a signed value by its size is equivalent 1787 // to a shift of size-1. 1788 --ShiftAmt; 1789 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); 1790 } 1791 } 1792 1793 Shift = EmitNeonShiftVector(Shift, Ty, false); 1794 if (usgn) 1795 return Builder.CreateLShr(Vec, Shift, name); 1796 else 1797 return Builder.CreateAShr(Vec, Shift, name); 1798 } 1799 1800 /// GetPointeeAlignment - Given an expression with a pointer type, find the 1801 /// alignment of the type referenced by the pointer. Skip over implicit 1802 /// casts. 1803 std::pair<llvm::Value*, unsigned> 1804 CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) { 1805 assert(Addr->getType()->isPointerType()); 1806 Addr = Addr->IgnoreParens(); 1807 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Addr)) { 1808 if ((ICE->getCastKind() == CK_BitCast || ICE->getCastKind() == CK_NoOp) && 1809 ICE->getSubExpr()->getType()->isPointerType()) { 1810 std::pair<llvm::Value*, unsigned> Ptr = 1811 EmitPointerWithAlignment(ICE->getSubExpr()); 1812 Ptr.first = Builder.CreateBitCast(Ptr.first, 1813 ConvertType(Addr->getType())); 1814 return Ptr; 1815 } else if (ICE->getCastKind() == CK_ArrayToPointerDecay) { 1816 LValue LV = EmitLValue(ICE->getSubExpr()); 1817 unsigned Align = LV.getAlignment().getQuantity(); 1818 if (!Align) { 1819 // FIXME: Once LValues are fixed to always set alignment, 1820 // zap this code. 1821 QualType PtTy = ICE->getSubExpr()->getType(); 1822 if (!PtTy->isIncompleteType()) 1823 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 1824 else 1825 Align = 1; 1826 } 1827 return std::make_pair(LV.getAddress(), Align); 1828 } 1829 } 1830 if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(Addr)) { 1831 if (UO->getOpcode() == UO_AddrOf) { 1832 LValue LV = EmitLValue(UO->getSubExpr()); 1833 unsigned Align = LV.getAlignment().getQuantity(); 1834 if (!Align) { 1835 // FIXME: Once LValues are fixed to always set alignment, 1836 // zap this code. 1837 QualType PtTy = UO->getSubExpr()->getType(); 1838 if (!PtTy->isIncompleteType()) 1839 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 1840 else 1841 Align = 1; 1842 } 1843 return std::make_pair(LV.getAddress(), Align); 1844 } 1845 } 1846 1847 unsigned Align = 1; 1848 QualType PtTy = Addr->getType()->getPointeeType(); 1849 if (!PtTy->isIncompleteType()) 1850 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 1851 1852 return std::make_pair(EmitScalarExpr(Addr), Align); 1853 } 1854 1855 enum { 1856 AddRetType = (1 << 0), 1857 Add1ArgType = (1 << 1), 1858 Add2ArgTypes = (1 << 2), 1859 1860 VectorizeRetType = (1 << 3), 1861 VectorizeArgTypes = (1 << 4), 1862 1863 InventFloatType = (1 << 5), 1864 UnsignedAlts = (1 << 6), 1865 1866 Use64BitVectors = (1 << 7), 1867 Use128BitVectors = (1 << 8), 1868 1869 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, 1870 VectorRet = AddRetType | VectorizeRetType, 1871 VectorRetGetArgs01 = 1872 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, 1873 FpCmpzModifiers = 1874 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType 1875 }; 1876 1877 struct NeonIntrinsicInfo { 1878 unsigned BuiltinID; 1879 unsigned LLVMIntrinsic; 1880 unsigned AltLLVMIntrinsic; 1881 const char *NameHint; 1882 unsigned TypeModifier; 1883 1884 bool operator<(unsigned RHSBuiltinID) const { 1885 return BuiltinID < RHSBuiltinID; 1886 } 1887 }; 1888 1889 #define NEONMAP0(NameBase) \ 1890 { NEON::BI__builtin_neon_ ## NameBase, 0, 0, #NameBase, 0 } 1891 1892 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 1893 { NEON:: BI__builtin_neon_ ## NameBase, \ 1894 Intrinsic::LLVMIntrinsic, 0, #NameBase, TypeModifier } 1895 1896 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ 1897 { NEON:: BI__builtin_neon_ ## NameBase, \ 1898 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ 1899 #NameBase, TypeModifier } 1900 1901 static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { 1902 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 1903 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 1904 NEONMAP1(vabs_v, arm_neon_vabs, 0), 1905 NEONMAP1(vabsq_v, arm_neon_vabs, 0), 1906 NEONMAP0(vaddhn_v), 1907 NEONMAP1(vaesdq_v, arm_neon_aesd, 0), 1908 NEONMAP1(vaeseq_v, arm_neon_aese, 0), 1909 NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), 1910 NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), 1911 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), 1912 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), 1913 NEONMAP1(vcage_v, arm_neon_vacge, 0), 1914 NEONMAP1(vcageq_v, arm_neon_vacge, 0), 1915 NEONMAP1(vcagt_v, arm_neon_vacgt, 0), 1916 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0), 1917 NEONMAP1(vcale_v, arm_neon_vacge, 0), 1918 NEONMAP1(vcaleq_v, arm_neon_vacge, 0), 1919 NEONMAP1(vcalt_v, arm_neon_vacgt, 0), 1920 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), 1921 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), 1922 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), 1923 NEONMAP1(vclz_v, ctlz, Add1ArgType), 1924 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 1925 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 1926 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 1927 NEONMAP1(vcvt_f16_v, arm_neon_vcvtfp2hf, 0), 1928 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), 1929 NEONMAP0(vcvt_f32_v), 1930 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 1931 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), 1932 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), 1933 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), 1934 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), 1935 NEONMAP0(vcvt_s32_v), 1936 NEONMAP0(vcvt_s64_v), 1937 NEONMAP0(vcvt_u32_v), 1938 NEONMAP0(vcvt_u64_v), 1939 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), 1940 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), 1941 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), 1942 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), 1943 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), 1944 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), 1945 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), 1946 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), 1947 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), 1948 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), 1949 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), 1950 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), 1951 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), 1952 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), 1953 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), 1954 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), 1955 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), 1956 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), 1957 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), 1958 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), 1959 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), 1960 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), 1961 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), 1962 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), 1963 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), 1964 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), 1965 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), 1966 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), 1967 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), 1968 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), 1969 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), 1970 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), 1971 NEONMAP0(vcvtq_f32_v), 1972 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 1973 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), 1974 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), 1975 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), 1976 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), 1977 NEONMAP0(vcvtq_s32_v), 1978 NEONMAP0(vcvtq_s64_v), 1979 NEONMAP0(vcvtq_u32_v), 1980 NEONMAP0(vcvtq_u64_v), 1981 NEONMAP0(vext_v), 1982 NEONMAP0(vextq_v), 1983 NEONMAP0(vfma_v), 1984 NEONMAP0(vfmaq_v), 1985 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 1986 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 1987 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 1988 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 1989 NEONMAP0(vld1_dup_v), 1990 NEONMAP1(vld1_v, arm_neon_vld1, 0), 1991 NEONMAP0(vld1q_dup_v), 1992 NEONMAP1(vld1q_v, arm_neon_vld1, 0), 1993 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), 1994 NEONMAP1(vld2_v, arm_neon_vld2, 0), 1995 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), 1996 NEONMAP1(vld2q_v, arm_neon_vld2, 0), 1997 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), 1998 NEONMAP1(vld3_v, arm_neon_vld3, 0), 1999 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), 2000 NEONMAP1(vld3q_v, arm_neon_vld3, 0), 2001 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), 2002 NEONMAP1(vld4_v, arm_neon_vld4, 0), 2003 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), 2004 NEONMAP1(vld4q_v, arm_neon_vld4, 0), 2005 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 2006 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 2007 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 2008 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 2009 NEONMAP0(vmovl_v), 2010 NEONMAP0(vmovn_v), 2011 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), 2012 NEONMAP0(vmull_v), 2013 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType), 2014 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 2015 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 2016 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType), 2017 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 2018 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 2019 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType), 2020 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts), 2021 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), 2022 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), 2023 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), 2024 NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 2025 NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 2026 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0), 2027 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0), 2028 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), 2029 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), 2030 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), 2031 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts), 2032 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), 2033 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), 2034 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), 2035 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), 2036 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), 2037 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 2038 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 2039 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 2040 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 2041 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 2042 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 2043 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), 2044 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), 2045 NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 2046 NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 2047 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), 2048 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 2049 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 2050 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType), 2051 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), 2052 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 2053 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 2054 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 2055 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 2056 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 2057 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 2058 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 2059 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 2060 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), 2061 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), 2062 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), 2063 NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0), 2064 NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0), 2065 NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0), 2066 NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0), 2067 NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0), 2068 NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0), 2069 NEONMAP0(vshl_n_v), 2070 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 2071 NEONMAP0(vshll_n_v), 2072 NEONMAP0(vshlq_n_v), 2073 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 2074 NEONMAP0(vshr_n_v), 2075 NEONMAP0(vshrn_n_v), 2076 NEONMAP0(vshrq_n_v), 2077 NEONMAP1(vst1_v, arm_neon_vst1, 0), 2078 NEONMAP1(vst1q_v, arm_neon_vst1, 0), 2079 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), 2080 NEONMAP1(vst2_v, arm_neon_vst2, 0), 2081 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), 2082 NEONMAP1(vst2q_v, arm_neon_vst2, 0), 2083 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0), 2084 NEONMAP1(vst3_v, arm_neon_vst3, 0), 2085 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0), 2086 NEONMAP1(vst3q_v, arm_neon_vst3, 0), 2087 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0), 2088 NEONMAP1(vst4_v, arm_neon_vst4, 0), 2089 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0), 2090 NEONMAP1(vst4q_v, arm_neon_vst4, 0), 2091 NEONMAP0(vsubhn_v), 2092 NEONMAP0(vtrn_v), 2093 NEONMAP0(vtrnq_v), 2094 NEONMAP0(vtst_v), 2095 NEONMAP0(vtstq_v), 2096 NEONMAP0(vuzp_v), 2097 NEONMAP0(vuzpq_v), 2098 NEONMAP0(vzip_v), 2099 NEONMAP0(vzipq_v) 2100 }; 2101 2102 static NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { 2103 NEONMAP1(vabs_v, aarch64_neon_abs, 0), 2104 NEONMAP1(vabsq_v, aarch64_neon_abs, 0), 2105 NEONMAP0(vaddhn_v), 2106 NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), 2107 NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), 2108 NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), 2109 NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), 2110 NEONMAP1(vcage_v, aarch64_neon_facge, 0), 2111 NEONMAP1(vcageq_v, aarch64_neon_facge, 0), 2112 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), 2113 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), 2114 NEONMAP1(vcale_v, aarch64_neon_facge, 0), 2115 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), 2116 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), 2117 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), 2118 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), 2119 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), 2120 NEONMAP1(vclz_v, ctlz, Add1ArgType), 2121 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 2122 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 2123 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 2124 NEONMAP1(vcvt_f16_v, aarch64_neon_vcvtfp2hf, 0), 2125 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), 2126 NEONMAP0(vcvt_f32_v), 2127 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2128 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2129 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 2130 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 2131 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 2132 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 2133 NEONMAP0(vcvtq_f32_v), 2134 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2135 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2136 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 2137 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 2138 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 2139 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 2140 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), 2141 NEONMAP0(vext_v), 2142 NEONMAP0(vextq_v), 2143 NEONMAP0(vfma_v), 2144 NEONMAP0(vfmaq_v), 2145 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 2146 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 2147 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 2148 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 2149 NEONMAP0(vmovl_v), 2150 NEONMAP0(vmovn_v), 2151 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), 2152 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), 2153 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), 2154 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 2155 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 2156 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), 2157 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), 2158 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), 2159 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 2160 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 2161 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), 2162 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), 2163 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), 2164 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), 2165 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), 2166 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), 2167 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), 2168 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), 2169 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), 2170 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), 2171 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), 2172 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 2173 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 2174 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), 2175 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 2176 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), 2177 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 2178 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), 2179 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), 2180 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 2181 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 2182 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), 2183 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 2184 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 2185 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), 2186 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), 2187 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 2188 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 2189 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 2190 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 2191 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 2192 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 2193 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 2194 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 2195 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), 2196 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), 2197 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), 2198 NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0), 2199 NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0), 2200 NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0), 2201 NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0), 2202 NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0), 2203 NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0), 2204 NEONMAP0(vshl_n_v), 2205 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 2206 NEONMAP0(vshll_n_v), 2207 NEONMAP0(vshlq_n_v), 2208 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 2209 NEONMAP0(vshr_n_v), 2210 NEONMAP0(vshrn_n_v), 2211 NEONMAP0(vshrq_n_v), 2212 NEONMAP0(vsubhn_v), 2213 NEONMAP0(vtst_v), 2214 NEONMAP0(vtstq_v), 2215 }; 2216 2217 static NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { 2218 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), 2219 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), 2220 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), 2221 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 2222 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 2223 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 2224 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 2225 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 2226 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 2227 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 2228 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 2229 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), 2230 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 2231 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), 2232 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 2233 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 2234 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 2235 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 2236 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 2237 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 2238 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 2239 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 2240 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 2241 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 2242 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 2243 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 2244 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 2245 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 2246 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 2247 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 2248 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 2249 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 2250 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 2251 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 2252 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 2253 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 2254 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 2255 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 2256 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 2257 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 2258 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 2259 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 2260 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 2261 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 2262 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 2263 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 2264 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 2265 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 2266 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), 2267 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 2268 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 2269 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 2270 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 2271 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 2272 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 2273 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 2274 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 2275 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 2276 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 2277 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 2278 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 2279 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 2280 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 2281 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 2282 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 2283 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 2284 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 2285 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 2286 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 2287 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), 2288 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), 2289 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), 2290 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 2291 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 2292 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 2293 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 2294 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 2295 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 2296 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 2297 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 2298 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 2299 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 2300 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 2301 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), 2302 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 2303 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), 2304 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 2305 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 2306 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), 2307 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), 2308 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 2309 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 2310 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), 2311 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), 2312 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), 2313 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), 2314 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), 2315 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), 2316 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), 2317 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), 2318 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 2319 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 2320 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 2321 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 2322 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), 2323 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 2324 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 2325 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 2326 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), 2327 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 2328 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), 2329 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), 2330 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), 2331 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 2332 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 2333 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), 2334 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), 2335 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 2336 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 2337 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), 2338 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), 2339 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), 2340 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), 2341 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 2342 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 2343 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 2344 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 2345 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), 2346 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 2347 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 2348 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 2349 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 2350 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 2351 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 2352 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), 2353 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), 2354 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 2355 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 2356 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 2357 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 2358 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), 2359 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), 2360 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), 2361 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), 2362 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 2363 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 2364 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), 2365 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), 2366 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), 2367 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 2368 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 2369 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 2370 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 2371 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), 2372 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 2373 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 2374 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 2375 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 2376 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), 2377 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), 2378 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 2379 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 2380 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), 2381 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), 2382 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), 2383 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), 2384 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), 2385 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), 2386 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), 2387 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), 2388 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), 2389 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), 2390 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), 2391 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), 2392 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), 2393 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), 2394 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), 2395 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), 2396 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), 2397 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), 2398 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), 2399 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), 2400 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 2401 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), 2402 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 2403 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), 2404 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), 2405 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), 2406 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 2407 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), 2408 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 2409 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), 2410 }; 2411 2412 #undef NEONMAP0 2413 #undef NEONMAP1 2414 #undef NEONMAP2 2415 2416 static bool NEONSIMDIntrinsicsProvenSorted = false; 2417 2418 static bool AArch64SIMDIntrinsicsProvenSorted = false; 2419 static bool AArch64SISDIntrinsicsProvenSorted = false; 2420 2421 2422 static const NeonIntrinsicInfo * 2423 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap, 2424 unsigned BuiltinID, bool &MapProvenSorted) { 2425 2426 #ifndef NDEBUG 2427 if (!MapProvenSorted) { 2428 // FIXME: use std::is_sorted once C++11 is allowed 2429 for (unsigned i = 0; i < IntrinsicMap.size() - 1; ++i) 2430 assert(IntrinsicMap[i].BuiltinID <= IntrinsicMap[i + 1].BuiltinID); 2431 MapProvenSorted = true; 2432 } 2433 #endif 2434 2435 const NeonIntrinsicInfo *Builtin = 2436 std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID); 2437 2438 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) 2439 return Builtin; 2440 2441 return nullptr; 2442 } 2443 2444 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, 2445 unsigned Modifier, 2446 llvm::Type *ArgType, 2447 const CallExpr *E) { 2448 int VectorSize = 0; 2449 if (Modifier & Use64BitVectors) 2450 VectorSize = 64; 2451 else if (Modifier & Use128BitVectors) 2452 VectorSize = 128; 2453 2454 // Return type. 2455 SmallVector<llvm::Type *, 3> Tys; 2456 if (Modifier & AddRetType) { 2457 llvm::Type *Ty = ConvertType(E->getCallReturnType()); 2458 if (Modifier & VectorizeRetType) 2459 Ty = llvm::VectorType::get( 2460 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); 2461 2462 Tys.push_back(Ty); 2463 } 2464 2465 // Arguments. 2466 if (Modifier & VectorizeArgTypes) { 2467 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1; 2468 ArgType = llvm::VectorType::get(ArgType, Elts); 2469 } 2470 2471 if (Modifier & (Add1ArgType | Add2ArgTypes)) 2472 Tys.push_back(ArgType); 2473 2474 if (Modifier & Add2ArgTypes) 2475 Tys.push_back(ArgType); 2476 2477 if (Modifier & InventFloatType) 2478 Tys.push_back(FloatTy); 2479 2480 return CGM.getIntrinsic(IntrinsicID, Tys); 2481 } 2482 2483 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, 2484 const NeonIntrinsicInfo &SISDInfo, 2485 SmallVectorImpl<Value *> &Ops, 2486 const CallExpr *E) { 2487 unsigned BuiltinID = SISDInfo.BuiltinID; 2488 unsigned int Int = SISDInfo.LLVMIntrinsic; 2489 unsigned Modifier = SISDInfo.TypeModifier; 2490 const char *s = SISDInfo.NameHint; 2491 2492 switch (BuiltinID) { 2493 case NEON::BI__builtin_neon_vcled_s64: 2494 case NEON::BI__builtin_neon_vcled_u64: 2495 case NEON::BI__builtin_neon_vcles_f32: 2496 case NEON::BI__builtin_neon_vcled_f64: 2497 case NEON::BI__builtin_neon_vcltd_s64: 2498 case NEON::BI__builtin_neon_vcltd_u64: 2499 case NEON::BI__builtin_neon_vclts_f32: 2500 case NEON::BI__builtin_neon_vcltd_f64: 2501 case NEON::BI__builtin_neon_vcales_f32: 2502 case NEON::BI__builtin_neon_vcaled_f64: 2503 case NEON::BI__builtin_neon_vcalts_f32: 2504 case NEON::BI__builtin_neon_vcaltd_f64: 2505 // Only one direction of comparisons actually exist, cmle is actually a cmge 2506 // with swapped operands. The table gives us the right intrinsic but we 2507 // still need to do the swap. 2508 std::swap(Ops[0], Ops[1]); 2509 break; 2510 } 2511 2512 assert(Int && "Generic code assumes a valid intrinsic"); 2513 2514 // Determine the type(s) of this overloaded AArch64 intrinsic. 2515 const Expr *Arg = E->getArg(0); 2516 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); 2517 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); 2518 2519 int j = 0; 2520 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); 2521 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 2522 ai != ae; ++ai, ++j) { 2523 llvm::Type *ArgTy = ai->getType(); 2524 if (Ops[j]->getType()->getPrimitiveSizeInBits() == 2525 ArgTy->getPrimitiveSizeInBits()) 2526 continue; 2527 2528 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); 2529 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate 2530 // it before inserting. 2531 Ops[j] = 2532 CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType()); 2533 Ops[j] = 2534 CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0); 2535 } 2536 2537 Value *Result = CGF.EmitNeonCall(F, Ops, s); 2538 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 2539 if (ResultType->getPrimitiveSizeInBits() < 2540 Result->getType()->getPrimitiveSizeInBits()) 2541 return CGF.Builder.CreateExtractElement(Result, C0); 2542 2543 return CGF.Builder.CreateBitCast(Result, ResultType, s); 2544 } 2545 2546 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( 2547 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, 2548 const char *NameHint, unsigned Modifier, const CallExpr *E, 2549 SmallVectorImpl<llvm::Value *> &Ops, llvm::Value *Align) { 2550 // Get the last argument, which specifies the vector type. 2551 llvm::APSInt NeonTypeConst; 2552 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 2553 if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext())) 2554 return nullptr; 2555 2556 // Determine the type of this overloaded NEON intrinsic. 2557 NeonTypeFlags Type(NeonTypeConst.getZExtValue()); 2558 bool Usgn = Type.isUnsigned(); 2559 bool Quad = Type.isQuad(); 2560 2561 llvm::VectorType *VTy = GetNeonType(this, Type); 2562 llvm::Type *Ty = VTy; 2563 if (!Ty) 2564 return nullptr; 2565 2566 unsigned Int = LLVMIntrinsic; 2567 if ((Modifier & UnsignedAlts) && !Usgn) 2568 Int = AltLLVMIntrinsic; 2569 2570 switch (BuiltinID) { 2571 default: break; 2572 case NEON::BI__builtin_neon_vabs_v: 2573 case NEON::BI__builtin_neon_vabsq_v: 2574 if (VTy->getElementType()->isFloatingPointTy()) 2575 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); 2576 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); 2577 case NEON::BI__builtin_neon_vaddhn_v: { 2578 llvm::VectorType *SrcTy = 2579 llvm::VectorType::getExtendedElementVectorType(VTy); 2580 2581 // %sum = add <4 x i32> %lhs, %rhs 2582 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 2583 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 2584 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); 2585 2586 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 2587 Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(), 2588 SrcTy->getScalarSizeInBits() / 2); 2589 ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt); 2590 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); 2591 2592 // %res = trunc <4 x i32> %high to <4 x i16> 2593 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); 2594 } 2595 case NEON::BI__builtin_neon_vcale_v: 2596 case NEON::BI__builtin_neon_vcaleq_v: 2597 case NEON::BI__builtin_neon_vcalt_v: 2598 case NEON::BI__builtin_neon_vcaltq_v: 2599 std::swap(Ops[0], Ops[1]); 2600 case NEON::BI__builtin_neon_vcage_v: 2601 case NEON::BI__builtin_neon_vcageq_v: 2602 case NEON::BI__builtin_neon_vcagt_v: 2603 case NEON::BI__builtin_neon_vcagtq_v: { 2604 llvm::Type *VecFlt = llvm::VectorType::get( 2605 VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy, 2606 VTy->getNumElements()); 2607 llvm::Type *Tys[] = { VTy, VecFlt }; 2608 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 2609 return EmitNeonCall(F, Ops, NameHint); 2610 } 2611 case NEON::BI__builtin_neon_vclz_v: 2612 case NEON::BI__builtin_neon_vclzq_v: 2613 // We generate target-independent intrinsic, which needs a second argument 2614 // for whether or not clz of zero is undefined; on ARM it isn't. 2615 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); 2616 break; 2617 case NEON::BI__builtin_neon_vcvt_f32_v: 2618 case NEON::BI__builtin_neon_vcvtq_f32_v: 2619 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2620 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad)); 2621 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 2622 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 2623 case NEON::BI__builtin_neon_vcvt_n_f32_v: 2624 case NEON::BI__builtin_neon_vcvt_n_f64_v: 2625 case NEON::BI__builtin_neon_vcvtq_n_f32_v: 2626 case NEON::BI__builtin_neon_vcvtq_n_f64_v: { 2627 bool Double = 2628 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 2629 llvm::Type *FloatTy = 2630 GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64 2631 : NeonTypeFlags::Float32, 2632 false, Quad)); 2633 llvm::Type *Tys[2] = { FloatTy, Ty }; 2634 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; 2635 Function *F = CGM.getIntrinsic(Int, Tys); 2636 return EmitNeonCall(F, Ops, "vcvt_n"); 2637 } 2638 case NEON::BI__builtin_neon_vcvt_n_s32_v: 2639 case NEON::BI__builtin_neon_vcvt_n_u32_v: 2640 case NEON::BI__builtin_neon_vcvt_n_s64_v: 2641 case NEON::BI__builtin_neon_vcvt_n_u64_v: 2642 case NEON::BI__builtin_neon_vcvtq_n_s32_v: 2643 case NEON::BI__builtin_neon_vcvtq_n_u32_v: 2644 case NEON::BI__builtin_neon_vcvtq_n_s64_v: 2645 case NEON::BI__builtin_neon_vcvtq_n_u64_v: { 2646 bool Double = 2647 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 2648 llvm::Type *FloatTy = 2649 GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64 2650 : NeonTypeFlags::Float32, 2651 false, Quad)); 2652 llvm::Type *Tys[2] = { Ty, FloatTy }; 2653 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 2654 return EmitNeonCall(F, Ops, "vcvt_n"); 2655 } 2656 case NEON::BI__builtin_neon_vcvt_s32_v: 2657 case NEON::BI__builtin_neon_vcvt_u32_v: 2658 case NEON::BI__builtin_neon_vcvt_s64_v: 2659 case NEON::BI__builtin_neon_vcvt_u64_v: 2660 case NEON::BI__builtin_neon_vcvtq_s32_v: 2661 case NEON::BI__builtin_neon_vcvtq_u32_v: 2662 case NEON::BI__builtin_neon_vcvtq_s64_v: 2663 case NEON::BI__builtin_neon_vcvtq_u64_v: { 2664 bool Double = 2665 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 2666 llvm::Type *FloatTy = 2667 GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64 2668 : NeonTypeFlags::Float32, 2669 false, Quad)); 2670 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 2671 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 2672 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 2673 } 2674 case NEON::BI__builtin_neon_vcvta_s32_v: 2675 case NEON::BI__builtin_neon_vcvta_s64_v: 2676 case NEON::BI__builtin_neon_vcvta_u32_v: 2677 case NEON::BI__builtin_neon_vcvta_u64_v: 2678 case NEON::BI__builtin_neon_vcvtaq_s32_v: 2679 case NEON::BI__builtin_neon_vcvtaq_s64_v: 2680 case NEON::BI__builtin_neon_vcvtaq_u32_v: 2681 case NEON::BI__builtin_neon_vcvtaq_u64_v: 2682 case NEON::BI__builtin_neon_vcvtn_s32_v: 2683 case NEON::BI__builtin_neon_vcvtn_s64_v: 2684 case NEON::BI__builtin_neon_vcvtn_u32_v: 2685 case NEON::BI__builtin_neon_vcvtn_u64_v: 2686 case NEON::BI__builtin_neon_vcvtnq_s32_v: 2687 case NEON::BI__builtin_neon_vcvtnq_s64_v: 2688 case NEON::BI__builtin_neon_vcvtnq_u32_v: 2689 case NEON::BI__builtin_neon_vcvtnq_u64_v: 2690 case NEON::BI__builtin_neon_vcvtp_s32_v: 2691 case NEON::BI__builtin_neon_vcvtp_s64_v: 2692 case NEON::BI__builtin_neon_vcvtp_u32_v: 2693 case NEON::BI__builtin_neon_vcvtp_u64_v: 2694 case NEON::BI__builtin_neon_vcvtpq_s32_v: 2695 case NEON::BI__builtin_neon_vcvtpq_s64_v: 2696 case NEON::BI__builtin_neon_vcvtpq_u32_v: 2697 case NEON::BI__builtin_neon_vcvtpq_u64_v: 2698 case NEON::BI__builtin_neon_vcvtm_s32_v: 2699 case NEON::BI__builtin_neon_vcvtm_s64_v: 2700 case NEON::BI__builtin_neon_vcvtm_u32_v: 2701 case NEON::BI__builtin_neon_vcvtm_u64_v: 2702 case NEON::BI__builtin_neon_vcvtmq_s32_v: 2703 case NEON::BI__builtin_neon_vcvtmq_s64_v: 2704 case NEON::BI__builtin_neon_vcvtmq_u32_v: 2705 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 2706 bool Double = 2707 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 2708 llvm::Type *InTy = 2709 GetNeonType(this, 2710 NeonTypeFlags(Double ? NeonTypeFlags::Float64 2711 : NeonTypeFlags::Float32, false, Quad)); 2712 llvm::Type *Tys[2] = { Ty, InTy }; 2713 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); 2714 } 2715 case NEON::BI__builtin_neon_vext_v: 2716 case NEON::BI__builtin_neon_vextq_v: { 2717 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 2718 SmallVector<Constant*, 16> Indices; 2719 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 2720 Indices.push_back(ConstantInt::get(Int32Ty, i+CV)); 2721 2722 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2723 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 2724 Value *SV = llvm::ConstantVector::get(Indices); 2725 return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext"); 2726 } 2727 case NEON::BI__builtin_neon_vfma_v: 2728 case NEON::BI__builtin_neon_vfmaq_v: { 2729 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 2730 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2731 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 2732 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 2733 2734 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 2735 return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 2736 } 2737 case NEON::BI__builtin_neon_vld1_v: 2738 case NEON::BI__builtin_neon_vld1q_v: 2739 Ops.push_back(Align); 2740 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vld1"); 2741 case NEON::BI__builtin_neon_vld2_v: 2742 case NEON::BI__builtin_neon_vld2q_v: 2743 case NEON::BI__builtin_neon_vld3_v: 2744 case NEON::BI__builtin_neon_vld3q_v: 2745 case NEON::BI__builtin_neon_vld4_v: 2746 case NEON::BI__builtin_neon_vld4q_v: { 2747 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty); 2748 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, NameHint); 2749 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 2750 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2751 return Builder.CreateStore(Ops[1], Ops[0]); 2752 } 2753 case NEON::BI__builtin_neon_vld1_dup_v: 2754 case NEON::BI__builtin_neon_vld1q_dup_v: { 2755 Value *V = UndefValue::get(Ty); 2756 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 2757 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2758 LoadInst *Ld = Builder.CreateLoad(Ops[0]); 2759 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 2760 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 2761 Ops[0] = Builder.CreateInsertElement(V, Ld, CI); 2762 return EmitNeonSplat(Ops[0], CI); 2763 } 2764 case NEON::BI__builtin_neon_vld2_lane_v: 2765 case NEON::BI__builtin_neon_vld2q_lane_v: 2766 case NEON::BI__builtin_neon_vld3_lane_v: 2767 case NEON::BI__builtin_neon_vld3q_lane_v: 2768 case NEON::BI__builtin_neon_vld4_lane_v: 2769 case NEON::BI__builtin_neon_vld4q_lane_v: { 2770 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty); 2771 for (unsigned I = 2; I < Ops.size() - 1; ++I) 2772 Ops[I] = Builder.CreateBitCast(Ops[I], Ty); 2773 Ops.push_back(Align); 2774 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint); 2775 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 2776 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2777 return Builder.CreateStore(Ops[1], Ops[0]); 2778 } 2779 case NEON::BI__builtin_neon_vmovl_v: { 2780 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 2781 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 2782 if (Usgn) 2783 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 2784 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 2785 } 2786 case NEON::BI__builtin_neon_vmovn_v: { 2787 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 2788 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 2789 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 2790 } 2791 case NEON::BI__builtin_neon_vmull_v: 2792 // FIXME: the integer vmull operations could be emitted in terms of pure 2793 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of 2794 // hoisting the exts outside loops. Until global ISel comes along that can 2795 // see through such movement this leads to bad CodeGen. So we need an 2796 // intrinsic for now. 2797 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 2798 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 2799 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 2800 case NEON::BI__builtin_neon_vpadal_v: 2801 case NEON::BI__builtin_neon_vpadalq_v: { 2802 // The source operand type has twice as many elements of half the size. 2803 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 2804 llvm::Type *EltTy = 2805 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 2806 llvm::Type *NarrowTy = 2807 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 2808 llvm::Type *Tys[2] = { Ty, NarrowTy }; 2809 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 2810 } 2811 case NEON::BI__builtin_neon_vpaddl_v: 2812 case NEON::BI__builtin_neon_vpaddlq_v: { 2813 // The source operand type has twice as many elements of half the size. 2814 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 2815 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 2816 llvm::Type *NarrowTy = 2817 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 2818 llvm::Type *Tys[2] = { Ty, NarrowTy }; 2819 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); 2820 } 2821 case NEON::BI__builtin_neon_vqdmlal_v: 2822 case NEON::BI__builtin_neon_vqdmlsl_v: { 2823 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 2824 Value *Mul = EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), 2825 MulOps, "vqdmlal"); 2826 2827 SmallVector<Value *, 2> AccumOps; 2828 AccumOps.push_back(Ops[0]); 2829 AccumOps.push_back(Mul); 2830 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), 2831 AccumOps, NameHint); 2832 } 2833 case NEON::BI__builtin_neon_vqshl_n_v: 2834 case NEON::BI__builtin_neon_vqshlq_n_v: 2835 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 2836 1, false); 2837 case NEON::BI__builtin_neon_vqshlu_n_v: 2838 case NEON::BI__builtin_neon_vqshluq_n_v: 2839 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 2840 1, false); 2841 case NEON::BI__builtin_neon_vrecpe_v: 2842 case NEON::BI__builtin_neon_vrecpeq_v: 2843 case NEON::BI__builtin_neon_vrsqrte_v: 2844 case NEON::BI__builtin_neon_vrsqrteq_v: 2845 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; 2846 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); 2847 2848 case NEON::BI__builtin_neon_vrshr_n_v: 2849 case NEON::BI__builtin_neon_vrshrq_n_v: 2850 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 2851 1, true); 2852 case NEON::BI__builtin_neon_vshl_n_v: 2853 case NEON::BI__builtin_neon_vshlq_n_v: 2854 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 2855 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], 2856 "vshl_n"); 2857 case NEON::BI__builtin_neon_vshll_n_v: { 2858 llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); 2859 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 2860 if (Usgn) 2861 Ops[0] = Builder.CreateZExt(Ops[0], VTy); 2862 else 2863 Ops[0] = Builder.CreateSExt(Ops[0], VTy); 2864 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); 2865 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); 2866 } 2867 case NEON::BI__builtin_neon_vshrn_n_v: { 2868 llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); 2869 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 2870 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); 2871 if (Usgn) 2872 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); 2873 else 2874 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); 2875 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); 2876 } 2877 case NEON::BI__builtin_neon_vshr_n_v: 2878 case NEON::BI__builtin_neon_vshrq_n_v: 2879 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n"); 2880 case NEON::BI__builtin_neon_vst1_v: 2881 case NEON::BI__builtin_neon_vst1q_v: 2882 case NEON::BI__builtin_neon_vst2_v: 2883 case NEON::BI__builtin_neon_vst2q_v: 2884 case NEON::BI__builtin_neon_vst3_v: 2885 case NEON::BI__builtin_neon_vst3q_v: 2886 case NEON::BI__builtin_neon_vst4_v: 2887 case NEON::BI__builtin_neon_vst4q_v: 2888 case NEON::BI__builtin_neon_vst2_lane_v: 2889 case NEON::BI__builtin_neon_vst2q_lane_v: 2890 case NEON::BI__builtin_neon_vst3_lane_v: 2891 case NEON::BI__builtin_neon_vst3q_lane_v: 2892 case NEON::BI__builtin_neon_vst4_lane_v: 2893 case NEON::BI__builtin_neon_vst4q_lane_v: 2894 Ops.push_back(Align); 2895 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, ""); 2896 case NEON::BI__builtin_neon_vsubhn_v: { 2897 llvm::VectorType *SrcTy = 2898 llvm::VectorType::getExtendedElementVectorType(VTy); 2899 2900 // %sum = add <4 x i32> %lhs, %rhs 2901 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 2902 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 2903 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); 2904 2905 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 2906 Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(), 2907 SrcTy->getScalarSizeInBits() / 2); 2908 ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt); 2909 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); 2910 2911 // %res = trunc <4 x i32> %high to <4 x i16> 2912 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); 2913 } 2914 case NEON::BI__builtin_neon_vtrn_v: 2915 case NEON::BI__builtin_neon_vtrnq_v: { 2916 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 2917 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 2918 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 2919 Value *SV = nullptr; 2920 2921 for (unsigned vi = 0; vi != 2; ++vi) { 2922 SmallVector<Constant*, 16> Indices; 2923 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 2924 Indices.push_back(Builder.getInt32(i+vi)); 2925 Indices.push_back(Builder.getInt32(i+e+vi)); 2926 } 2927 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 2928 SV = llvm::ConstantVector::get(Indices); 2929 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); 2930 SV = Builder.CreateStore(SV, Addr); 2931 } 2932 return SV; 2933 } 2934 case NEON::BI__builtin_neon_vtst_v: 2935 case NEON::BI__builtin_neon_vtstq_v: { 2936 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2937 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 2938 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 2939 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 2940 ConstantAggregateZero::get(Ty)); 2941 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 2942 } 2943 case NEON::BI__builtin_neon_vuzp_v: 2944 case NEON::BI__builtin_neon_vuzpq_v: { 2945 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 2946 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 2947 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 2948 Value *SV = nullptr; 2949 2950 for (unsigned vi = 0; vi != 2; ++vi) { 2951 SmallVector<Constant*, 16> Indices; 2952 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 2953 Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); 2954 2955 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 2956 SV = llvm::ConstantVector::get(Indices); 2957 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); 2958 SV = Builder.CreateStore(SV, Addr); 2959 } 2960 return SV; 2961 } 2962 case NEON::BI__builtin_neon_vzip_v: 2963 case NEON::BI__builtin_neon_vzipq_v: { 2964 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 2965 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 2966 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 2967 Value *SV = nullptr; 2968 2969 for (unsigned vi = 0; vi != 2; ++vi) { 2970 SmallVector<Constant*, 16> Indices; 2971 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 2972 Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); 2973 Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); 2974 } 2975 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 2976 SV = llvm::ConstantVector::get(Indices); 2977 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); 2978 SV = Builder.CreateStore(SV, Addr); 2979 } 2980 return SV; 2981 } 2982 } 2983 2984 assert(Int && "Expected valid intrinsic number"); 2985 2986 // Determine the type(s) of this overloaded AArch64 intrinsic. 2987 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E); 2988 2989 Value *Result = EmitNeonCall(F, Ops, NameHint); 2990 llvm::Type *ResultType = ConvertType(E->getType()); 2991 // AArch64 intrinsic one-element vector type cast to 2992 // scalar type expected by the builtin 2993 return Builder.CreateBitCast(Result, ResultType, NameHint); 2994 } 2995 2996 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( 2997 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, 2998 const CmpInst::Predicate Ip, const Twine &Name) { 2999 llvm::Type *OTy = Op->getType(); 3000 3001 // FIXME: this is utterly horrific. We should not be looking at previous 3002 // codegen context to find out what needs doing. Unfortunately TableGen 3003 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32 3004 // (etc). 3005 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op)) 3006 OTy = BI->getOperand(0)->getType(); 3007 3008 Op = Builder.CreateBitCast(Op, OTy); 3009 if (OTy->getScalarType()->isFloatingPointTy()) { 3010 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); 3011 } else { 3012 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); 3013 } 3014 return Builder.CreateSExt(Op, Ty, Name); 3015 } 3016 3017 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 3018 Value *ExtOp, Value *IndexOp, 3019 llvm::Type *ResTy, unsigned IntID, 3020 const char *Name) { 3021 SmallVector<Value *, 2> TblOps; 3022 if (ExtOp) 3023 TblOps.push_back(ExtOp); 3024 3025 // Build a vector containing sequential number like (0, 1, 2, ..., 15) 3026 SmallVector<Constant*, 16> Indices; 3027 llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); 3028 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { 3029 Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i)); 3030 Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1)); 3031 } 3032 Value *SV = llvm::ConstantVector::get(Indices); 3033 3034 int PairPos = 0, End = Ops.size() - 1; 3035 while (PairPos < End) { 3036 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 3037 Ops[PairPos+1], SV, Name)); 3038 PairPos += 2; 3039 } 3040 3041 // If there's an odd number of 64-bit lookup table, fill the high 64-bit 3042 // of the 128-bit lookup table with zero. 3043 if (PairPos == End) { 3044 Value *ZeroTbl = ConstantAggregateZero::get(TblTy); 3045 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 3046 ZeroTbl, SV, Name)); 3047 } 3048 3049 Function *TblF; 3050 TblOps.push_back(IndexOp); 3051 TblF = CGF.CGM.getIntrinsic(IntID, ResTy); 3052 3053 return CGF.EmitNeonCall(TblF, TblOps, Name); 3054 } 3055 3056 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 3057 const CallExpr *E) { 3058 unsigned HintID = static_cast<unsigned>(-1); 3059 switch (BuiltinID) { 3060 default: break; 3061 case ARM::BI__builtin_arm_nop: 3062 HintID = 0; 3063 break; 3064 case ARM::BI__builtin_arm_yield: 3065 case ARM::BI__yield: 3066 HintID = 1; 3067 break; 3068 case ARM::BI__builtin_arm_wfe: 3069 case ARM::BI__wfe: 3070 HintID = 2; 3071 break; 3072 case ARM::BI__builtin_arm_wfi: 3073 case ARM::BI__wfi: 3074 HintID = 3; 3075 break; 3076 case ARM::BI__builtin_arm_sev: 3077 case ARM::BI__sev: 3078 HintID = 4; 3079 break; 3080 case ARM::BI__builtin_arm_sevl: 3081 case ARM::BI__sevl: 3082 HintID = 5; 3083 break; 3084 } 3085 3086 if (HintID != static_cast<unsigned>(-1)) { 3087 Function *F = CGM.getIntrinsic(Intrinsic::arm_hint); 3088 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 3089 } 3090 3091 if (BuiltinID == ARM::BI__builtin_arm_rbit) { 3092 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit), 3093 EmitScalarExpr(E->getArg(0)), 3094 "rbit"); 3095 } 3096 3097 if (BuiltinID == ARM::BI__clear_cache) { 3098 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 3099 const FunctionDecl *FD = E->getDirectCallee(); 3100 SmallVector<Value*, 2> Ops; 3101 for (unsigned i = 0; i < 2; i++) 3102 Ops.push_back(EmitScalarExpr(E->getArg(i))); 3103 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 3104 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 3105 StringRef Name = FD->getName(); 3106 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 3107 } 3108 3109 if (BuiltinID == ARM::BI__builtin_arm_ldrexd || 3110 ((BuiltinID == ARM::BI__builtin_arm_ldrex || 3111 BuiltinID == ARM::BI__builtin_arm_ldaex) && 3112 getContext().getTypeSize(E->getType()) == 64) || 3113 BuiltinID == ARM::BI__ldrexd) { 3114 Function *F; 3115 3116 switch (BuiltinID) { 3117 default: llvm_unreachable("unexpected builtin"); 3118 case ARM::BI__builtin_arm_ldaex: 3119 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); 3120 break; 3121 case ARM::BI__builtin_arm_ldrexd: 3122 case ARM::BI__builtin_arm_ldrex: 3123 case ARM::BI__ldrexd: 3124 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 3125 break; 3126 } 3127 3128 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 3129 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 3130 "ldrexd"); 3131 3132 Value *Val0 = Builder.CreateExtractValue(Val, 1); 3133 Value *Val1 = Builder.CreateExtractValue(Val, 0); 3134 Val0 = Builder.CreateZExt(Val0, Int64Ty); 3135 Val1 = Builder.CreateZExt(Val1, Int64Ty); 3136 3137 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 3138 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 3139 Val = Builder.CreateOr(Val, Val1); 3140 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 3141 } 3142 3143 if (BuiltinID == ARM::BI__builtin_arm_ldrex || 3144 BuiltinID == ARM::BI__builtin_arm_ldaex) { 3145 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 3146 3147 QualType Ty = E->getType(); 3148 llvm::Type *RealResTy = ConvertType(Ty); 3149 llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(), 3150 getContext().getTypeSize(Ty)); 3151 LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo()); 3152 3153 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex 3154 ? Intrinsic::arm_ldaex 3155 : Intrinsic::arm_ldrex, 3156 LoadAddr->getType()); 3157 Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); 3158 3159 if (RealResTy->isPointerTy()) 3160 return Builder.CreateIntToPtr(Val, RealResTy); 3161 else { 3162 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 3163 return Builder.CreateBitCast(Val, RealResTy); 3164 } 3165 } 3166 3167 if (BuiltinID == ARM::BI__builtin_arm_strexd || 3168 ((BuiltinID == ARM::BI__builtin_arm_stlex || 3169 BuiltinID == ARM::BI__builtin_arm_strex) && 3170 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { 3171 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 3172 ? Intrinsic::arm_stlexd 3173 : Intrinsic::arm_strexd); 3174 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, NULL); 3175 3176 Value *Tmp = CreateMemTemp(E->getArg(0)->getType()); 3177 Value *Val = EmitScalarExpr(E->getArg(0)); 3178 Builder.CreateStore(Val, Tmp); 3179 3180 Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 3181 Val = Builder.CreateLoad(LdPtr); 3182 3183 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 3184 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 3185 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); 3186 return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd"); 3187 } 3188 3189 if (BuiltinID == ARM::BI__builtin_arm_strex || 3190 BuiltinID == ARM::BI__builtin_arm_stlex) { 3191 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 3192 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 3193 3194 QualType Ty = E->getArg(0)->getType(); 3195 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 3196 getContext().getTypeSize(Ty)); 3197 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 3198 3199 if (StoreVal->getType()->isPointerTy()) 3200 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); 3201 else { 3202 StoreVal = Builder.CreateBitCast(StoreVal, StoreTy); 3203 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); 3204 } 3205 3206 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 3207 ? Intrinsic::arm_stlex 3208 : Intrinsic::arm_strex, 3209 StoreAddr->getType()); 3210 return Builder.CreateCall2(F, StoreVal, StoreAddr, "strex"); 3211 } 3212 3213 if (BuiltinID == ARM::BI__builtin_arm_clrex) { 3214 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); 3215 return Builder.CreateCall(F); 3216 } 3217 3218 // CRC32 3219 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 3220 switch (BuiltinID) { 3221 case ARM::BI__builtin_arm_crc32b: 3222 CRCIntrinsicID = Intrinsic::arm_crc32b; break; 3223 case ARM::BI__builtin_arm_crc32cb: 3224 CRCIntrinsicID = Intrinsic::arm_crc32cb; break; 3225 case ARM::BI__builtin_arm_crc32h: 3226 CRCIntrinsicID = Intrinsic::arm_crc32h; break; 3227 case ARM::BI__builtin_arm_crc32ch: 3228 CRCIntrinsicID = Intrinsic::arm_crc32ch; break; 3229 case ARM::BI__builtin_arm_crc32w: 3230 case ARM::BI__builtin_arm_crc32d: 3231 CRCIntrinsicID = Intrinsic::arm_crc32w; break; 3232 case ARM::BI__builtin_arm_crc32cw: 3233 case ARM::BI__builtin_arm_crc32cd: 3234 CRCIntrinsicID = Intrinsic::arm_crc32cw; break; 3235 } 3236 3237 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 3238 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 3239 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 3240 3241 // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w 3242 // intrinsics, hence we need different codegen for these cases. 3243 if (BuiltinID == ARM::BI__builtin_arm_crc32d || 3244 BuiltinID == ARM::BI__builtin_arm_crc32cd) { 3245 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 3246 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); 3247 Value *Arg1b = Builder.CreateLShr(Arg1, C1); 3248 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); 3249 3250 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 3251 Value *Res = Builder.CreateCall2(F, Arg0, Arg1a); 3252 return Builder.CreateCall2(F, Res, Arg1b); 3253 } else { 3254 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); 3255 3256 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 3257 return Builder.CreateCall2(F, Arg0, Arg1); 3258 } 3259 } 3260 3261 SmallVector<Value*, 4> Ops; 3262 llvm::Value *Align = nullptr; 3263 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 3264 if (i == 0) { 3265 switch (BuiltinID) { 3266 case NEON::BI__builtin_neon_vld1_v: 3267 case NEON::BI__builtin_neon_vld1q_v: 3268 case NEON::BI__builtin_neon_vld1q_lane_v: 3269 case NEON::BI__builtin_neon_vld1_lane_v: 3270 case NEON::BI__builtin_neon_vld1_dup_v: 3271 case NEON::BI__builtin_neon_vld1q_dup_v: 3272 case NEON::BI__builtin_neon_vst1_v: 3273 case NEON::BI__builtin_neon_vst1q_v: 3274 case NEON::BI__builtin_neon_vst1q_lane_v: 3275 case NEON::BI__builtin_neon_vst1_lane_v: 3276 case NEON::BI__builtin_neon_vst2_v: 3277 case NEON::BI__builtin_neon_vst2q_v: 3278 case NEON::BI__builtin_neon_vst2_lane_v: 3279 case NEON::BI__builtin_neon_vst2q_lane_v: 3280 case NEON::BI__builtin_neon_vst3_v: 3281 case NEON::BI__builtin_neon_vst3q_v: 3282 case NEON::BI__builtin_neon_vst3_lane_v: 3283 case NEON::BI__builtin_neon_vst3q_lane_v: 3284 case NEON::BI__builtin_neon_vst4_v: 3285 case NEON::BI__builtin_neon_vst4q_v: 3286 case NEON::BI__builtin_neon_vst4_lane_v: 3287 case NEON::BI__builtin_neon_vst4q_lane_v: 3288 // Get the alignment for the argument in addition to the value; 3289 // we'll use it later. 3290 std::pair<llvm::Value*, unsigned> Src = 3291 EmitPointerWithAlignment(E->getArg(0)); 3292 Ops.push_back(Src.first); 3293 Align = Builder.getInt32(Src.second); 3294 continue; 3295 } 3296 } 3297 if (i == 1) { 3298 switch (BuiltinID) { 3299 case NEON::BI__builtin_neon_vld2_v: 3300 case NEON::BI__builtin_neon_vld2q_v: 3301 case NEON::BI__builtin_neon_vld3_v: 3302 case NEON::BI__builtin_neon_vld3q_v: 3303 case NEON::BI__builtin_neon_vld4_v: 3304 case NEON::BI__builtin_neon_vld4q_v: 3305 case NEON::BI__builtin_neon_vld2_lane_v: 3306 case NEON::BI__builtin_neon_vld2q_lane_v: 3307 case NEON::BI__builtin_neon_vld3_lane_v: 3308 case NEON::BI__builtin_neon_vld3q_lane_v: 3309 case NEON::BI__builtin_neon_vld4_lane_v: 3310 case NEON::BI__builtin_neon_vld4q_lane_v: 3311 case NEON::BI__builtin_neon_vld2_dup_v: 3312 case NEON::BI__builtin_neon_vld3_dup_v: 3313 case NEON::BI__builtin_neon_vld4_dup_v: 3314 // Get the alignment for the argument in addition to the value; 3315 // we'll use it later. 3316 std::pair<llvm::Value*, unsigned> Src = 3317 EmitPointerWithAlignment(E->getArg(1)); 3318 Ops.push_back(Src.first); 3319 Align = Builder.getInt32(Src.second); 3320 continue; 3321 } 3322 } 3323 Ops.push_back(EmitScalarExpr(E->getArg(i))); 3324 } 3325 3326 switch (BuiltinID) { 3327 default: break; 3328 // vget_lane and vset_lane are not overloaded and do not have an extra 3329 // argument that specifies the vector type. 3330 case NEON::BI__builtin_neon_vget_lane_i8: 3331 case NEON::BI__builtin_neon_vget_lane_i16: 3332 case NEON::BI__builtin_neon_vget_lane_i32: 3333 case NEON::BI__builtin_neon_vget_lane_i64: 3334 case NEON::BI__builtin_neon_vget_lane_f32: 3335 case NEON::BI__builtin_neon_vgetq_lane_i8: 3336 case NEON::BI__builtin_neon_vgetq_lane_i16: 3337 case NEON::BI__builtin_neon_vgetq_lane_i32: 3338 case NEON::BI__builtin_neon_vgetq_lane_i64: 3339 case NEON::BI__builtin_neon_vgetq_lane_f32: 3340 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 3341 "vget_lane"); 3342 case NEON::BI__builtin_neon_vset_lane_i8: 3343 case NEON::BI__builtin_neon_vset_lane_i16: 3344 case NEON::BI__builtin_neon_vset_lane_i32: 3345 case NEON::BI__builtin_neon_vset_lane_i64: 3346 case NEON::BI__builtin_neon_vset_lane_f32: 3347 case NEON::BI__builtin_neon_vsetq_lane_i8: 3348 case NEON::BI__builtin_neon_vsetq_lane_i16: 3349 case NEON::BI__builtin_neon_vsetq_lane_i32: 3350 case NEON::BI__builtin_neon_vsetq_lane_i64: 3351 case NEON::BI__builtin_neon_vsetq_lane_f32: 3352 Ops.push_back(EmitScalarExpr(E->getArg(2))); 3353 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 3354 3355 // Non-polymorphic crypto instructions also not overloaded 3356 case NEON::BI__builtin_neon_vsha1h_u32: 3357 Ops.push_back(EmitScalarExpr(E->getArg(0))); 3358 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops, 3359 "vsha1h"); 3360 case NEON::BI__builtin_neon_vsha1cq_u32: 3361 Ops.push_back(EmitScalarExpr(E->getArg(2))); 3362 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops, 3363 "vsha1h"); 3364 case NEON::BI__builtin_neon_vsha1pq_u32: 3365 Ops.push_back(EmitScalarExpr(E->getArg(2))); 3366 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops, 3367 "vsha1h"); 3368 case NEON::BI__builtin_neon_vsha1mq_u32: 3369 Ops.push_back(EmitScalarExpr(E->getArg(2))); 3370 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, 3371 "vsha1h"); 3372 } 3373 3374 // Get the last argument, which specifies the vector type. 3375 llvm::APSInt Result; 3376 const Expr *Arg = E->getArg(E->getNumArgs()-1); 3377 if (!Arg->isIntegerConstantExpr(Result, getContext())) 3378 return nullptr; 3379 3380 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 3381 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 3382 // Determine the overloaded type of this builtin. 3383 llvm::Type *Ty; 3384 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 3385 Ty = FloatTy; 3386 else 3387 Ty = DoubleTy; 3388 3389 // Determine whether this is an unsigned conversion or not. 3390 bool usgn = Result.getZExtValue() == 1; 3391 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 3392 3393 // Call the appropriate intrinsic. 3394 Function *F = CGM.getIntrinsic(Int, Ty); 3395 return Builder.CreateCall(F, Ops, "vcvtr"); 3396 } 3397 3398 // Determine the type of this overloaded NEON intrinsic. 3399 NeonTypeFlags Type(Result.getZExtValue()); 3400 bool usgn = Type.isUnsigned(); 3401 bool rightShift = false; 3402 3403 llvm::VectorType *VTy = GetNeonType(this, Type); 3404 llvm::Type *Ty = VTy; 3405 if (!Ty) 3406 return nullptr; 3407 3408 // Many NEON builtins have identical semantics and uses in ARM and 3409 // AArch64. Emit these in a single function. 3410 ArrayRef<NeonIntrinsicInfo> IntrinsicMap(ARMSIMDIntrinsicMap); 3411 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 3412 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); 3413 if (Builtin) 3414 return EmitCommonNeonBuiltinExpr( 3415 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 3416 Builtin->NameHint, Builtin->TypeModifier, E, Ops, Align); 3417 3418 unsigned Int; 3419 switch (BuiltinID) { 3420 default: return nullptr; 3421 case NEON::BI__builtin_neon_vld1q_lane_v: 3422 // Handle 64-bit integer elements as a special case. Use shuffles of 3423 // one-element vectors to avoid poor code for i64 in the backend. 3424 if (VTy->getElementType()->isIntegerTy(64)) { 3425 // Extract the other lane. 3426 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3427 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); 3428 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); 3429 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 3430 // Load the value as a one-element vector. 3431 Ty = llvm::VectorType::get(VTy->getElementType(), 1); 3432 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty); 3433 Value *Ld = Builder.CreateCall2(F, Ops[0], Align); 3434 // Combine them. 3435 SmallVector<Constant*, 2> Indices; 3436 Indices.push_back(ConstantInt::get(Int32Ty, 1-Lane)); 3437 Indices.push_back(ConstantInt::get(Int32Ty, Lane)); 3438 SV = llvm::ConstantVector::get(Indices); 3439 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); 3440 } 3441 // fall through 3442 case NEON::BI__builtin_neon_vld1_lane_v: { 3443 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3444 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 3445 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3446 LoadInst *Ld = Builder.CreateLoad(Ops[0]); 3447 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 3448 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 3449 } 3450 case NEON::BI__builtin_neon_vld2_dup_v: 3451 case NEON::BI__builtin_neon_vld3_dup_v: 3452 case NEON::BI__builtin_neon_vld4_dup_v: { 3453 // Handle 64-bit elements as a special-case. There is no "dup" needed. 3454 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { 3455 switch (BuiltinID) { 3456 case NEON::BI__builtin_neon_vld2_dup_v: 3457 Int = Intrinsic::arm_neon_vld2; 3458 break; 3459 case NEON::BI__builtin_neon_vld3_dup_v: 3460 Int = Intrinsic::arm_neon_vld3; 3461 break; 3462 case NEON::BI__builtin_neon_vld4_dup_v: 3463 Int = Intrinsic::arm_neon_vld4; 3464 break; 3465 default: llvm_unreachable("unknown vld_dup intrinsic?"); 3466 } 3467 Function *F = CGM.getIntrinsic(Int, Ty); 3468 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup"); 3469 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3470 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3471 return Builder.CreateStore(Ops[1], Ops[0]); 3472 } 3473 switch (BuiltinID) { 3474 case NEON::BI__builtin_neon_vld2_dup_v: 3475 Int = Intrinsic::arm_neon_vld2lane; 3476 break; 3477 case NEON::BI__builtin_neon_vld3_dup_v: 3478 Int = Intrinsic::arm_neon_vld3lane; 3479 break; 3480 case NEON::BI__builtin_neon_vld4_dup_v: 3481 Int = Intrinsic::arm_neon_vld4lane; 3482 break; 3483 default: llvm_unreachable("unknown vld_dup intrinsic?"); 3484 } 3485 Function *F = CGM.getIntrinsic(Int, Ty); 3486 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 3487 3488 SmallVector<Value*, 6> Args; 3489 Args.push_back(Ops[1]); 3490 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 3491 3492 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 3493 Args.push_back(CI); 3494 Args.push_back(Align); 3495 3496 Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); 3497 // splat lane 0 to all elts in each vector of the result. 3498 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 3499 Value *Val = Builder.CreateExtractValue(Ops[1], i); 3500 Value *Elt = Builder.CreateBitCast(Val, Ty); 3501 Elt = EmitNeonSplat(Elt, CI); 3502 Elt = Builder.CreateBitCast(Elt, Val->getType()); 3503 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 3504 } 3505 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3506 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3507 return Builder.CreateStore(Ops[1], Ops[0]); 3508 } 3509 case NEON::BI__builtin_neon_vqrshrn_n_v: 3510 Int = 3511 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 3512 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 3513 1, true); 3514 case NEON::BI__builtin_neon_vqrshrun_n_v: 3515 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), 3516 Ops, "vqrshrun_n", 1, true); 3517 case NEON::BI__builtin_neon_vqshrn_n_v: 3518 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 3519 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 3520 1, true); 3521 case NEON::BI__builtin_neon_vqshrun_n_v: 3522 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), 3523 Ops, "vqshrun_n", 1, true); 3524 case NEON::BI__builtin_neon_vrecpe_v: 3525 case NEON::BI__builtin_neon_vrecpeq_v: 3526 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), 3527 Ops, "vrecpe"); 3528 case NEON::BI__builtin_neon_vrshrn_n_v: 3529 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), 3530 Ops, "vrshrn_n", 1, true); 3531 case NEON::BI__builtin_neon_vrsra_n_v: 3532 case NEON::BI__builtin_neon_vrsraq_n_v: 3533 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3534 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3535 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 3536 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 3537 Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]); 3538 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 3539 case NEON::BI__builtin_neon_vsri_n_v: 3540 case NEON::BI__builtin_neon_vsriq_n_v: 3541 rightShift = true; 3542 case NEON::BI__builtin_neon_vsli_n_v: 3543 case NEON::BI__builtin_neon_vsliq_n_v: 3544 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 3545 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), 3546 Ops, "vsli_n"); 3547 case NEON::BI__builtin_neon_vsra_n_v: 3548 case NEON::BI__builtin_neon_vsraq_n_v: 3549 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3550 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 3551 return Builder.CreateAdd(Ops[0], Ops[1]); 3552 case NEON::BI__builtin_neon_vst1q_lane_v: 3553 // Handle 64-bit integer elements as a special case. Use a shuffle to get 3554 // a one-element vector and avoid poor code for i64 in the backend. 3555 if (VTy->getElementType()->isIntegerTy(64)) { 3556 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3557 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); 3558 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 3559 Ops[2] = Align; 3560 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, 3561 Ops[1]->getType()), Ops); 3562 } 3563 // fall through 3564 case NEON::BI__builtin_neon_vst1_lane_v: { 3565 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3566 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 3567 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3568 StoreInst *St = Builder.CreateStore(Ops[1], 3569 Builder.CreateBitCast(Ops[0], Ty)); 3570 St->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 3571 return St; 3572 } 3573 case NEON::BI__builtin_neon_vtbl1_v: 3574 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 3575 Ops, "vtbl1"); 3576 case NEON::BI__builtin_neon_vtbl2_v: 3577 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 3578 Ops, "vtbl2"); 3579 case NEON::BI__builtin_neon_vtbl3_v: 3580 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 3581 Ops, "vtbl3"); 3582 case NEON::BI__builtin_neon_vtbl4_v: 3583 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 3584 Ops, "vtbl4"); 3585 case NEON::BI__builtin_neon_vtbx1_v: 3586 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 3587 Ops, "vtbx1"); 3588 case NEON::BI__builtin_neon_vtbx2_v: 3589 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 3590 Ops, "vtbx2"); 3591 case NEON::BI__builtin_neon_vtbx3_v: 3592 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 3593 Ops, "vtbx3"); 3594 case NEON::BI__builtin_neon_vtbx4_v: 3595 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 3596 Ops, "vtbx4"); 3597 } 3598 } 3599 3600 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, 3601 const CallExpr *E, 3602 SmallVectorImpl<Value *> &Ops) { 3603 unsigned int Int = 0; 3604 const char *s = nullptr; 3605 3606 switch (BuiltinID) { 3607 default: 3608 return nullptr; 3609 case NEON::BI__builtin_neon_vtbl1_v: 3610 case NEON::BI__builtin_neon_vqtbl1_v: 3611 case NEON::BI__builtin_neon_vqtbl1q_v: 3612 case NEON::BI__builtin_neon_vtbl2_v: 3613 case NEON::BI__builtin_neon_vqtbl2_v: 3614 case NEON::BI__builtin_neon_vqtbl2q_v: 3615 case NEON::BI__builtin_neon_vtbl3_v: 3616 case NEON::BI__builtin_neon_vqtbl3_v: 3617 case NEON::BI__builtin_neon_vqtbl3q_v: 3618 case NEON::BI__builtin_neon_vtbl4_v: 3619 case NEON::BI__builtin_neon_vqtbl4_v: 3620 case NEON::BI__builtin_neon_vqtbl4q_v: 3621 break; 3622 case NEON::BI__builtin_neon_vtbx1_v: 3623 case NEON::BI__builtin_neon_vqtbx1_v: 3624 case NEON::BI__builtin_neon_vqtbx1q_v: 3625 case NEON::BI__builtin_neon_vtbx2_v: 3626 case NEON::BI__builtin_neon_vqtbx2_v: 3627 case NEON::BI__builtin_neon_vqtbx2q_v: 3628 case NEON::BI__builtin_neon_vtbx3_v: 3629 case NEON::BI__builtin_neon_vqtbx3_v: 3630 case NEON::BI__builtin_neon_vqtbx3q_v: 3631 case NEON::BI__builtin_neon_vtbx4_v: 3632 case NEON::BI__builtin_neon_vqtbx4_v: 3633 case NEON::BI__builtin_neon_vqtbx4q_v: 3634 break; 3635 } 3636 3637 assert(E->getNumArgs() >= 3); 3638 3639 // Get the last argument, which specifies the vector type. 3640 llvm::APSInt Result; 3641 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 3642 if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) 3643 return nullptr; 3644 3645 // Determine the type of this overloaded NEON intrinsic. 3646 NeonTypeFlags Type(Result.getZExtValue()); 3647 llvm::VectorType *VTy = GetNeonType(&CGF, Type); 3648 llvm::Type *Ty = VTy; 3649 if (!Ty) 3650 return nullptr; 3651 3652 unsigned nElts = VTy->getNumElements(); 3653 3654 CodeGen::CGBuilderTy &Builder = CGF.Builder; 3655 3656 // AArch64 scalar builtins are not overloaded, they do not have an extra 3657 // argument that specifies the vector type, need to handle each case. 3658 SmallVector<Value *, 2> TblOps; 3659 switch (BuiltinID) { 3660 case NEON::BI__builtin_neon_vtbl1_v: { 3661 TblOps.push_back(Ops[0]); 3662 return packTBLDVectorList(CGF, TblOps, nullptr, Ops[1], Ty, 3663 Intrinsic::aarch64_neon_tbl1, "vtbl1"); 3664 } 3665 case NEON::BI__builtin_neon_vtbl2_v: { 3666 TblOps.push_back(Ops[0]); 3667 TblOps.push_back(Ops[1]); 3668 return packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty, 3669 Intrinsic::aarch64_neon_tbl1, "vtbl1"); 3670 } 3671 case NEON::BI__builtin_neon_vtbl3_v: { 3672 TblOps.push_back(Ops[0]); 3673 TblOps.push_back(Ops[1]); 3674 TblOps.push_back(Ops[2]); 3675 return packTBLDVectorList(CGF, TblOps, nullptr, Ops[3], Ty, 3676 Intrinsic::aarch64_neon_tbl2, "vtbl2"); 3677 } 3678 case NEON::BI__builtin_neon_vtbl4_v: { 3679 TblOps.push_back(Ops[0]); 3680 TblOps.push_back(Ops[1]); 3681 TblOps.push_back(Ops[2]); 3682 TblOps.push_back(Ops[3]); 3683 return packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty, 3684 Intrinsic::aarch64_neon_tbl2, "vtbl2"); 3685 } 3686 case NEON::BI__builtin_neon_vtbx1_v: { 3687 TblOps.push_back(Ops[1]); 3688 Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty, 3689 Intrinsic::aarch64_neon_tbl1, "vtbl1"); 3690 3691 llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8); 3692 Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight); 3693 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); 3694 CmpRes = Builder.CreateSExt(CmpRes, Ty); 3695 3696 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 3697 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 3698 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 3699 } 3700 case NEON::BI__builtin_neon_vtbx2_v: { 3701 TblOps.push_back(Ops[1]); 3702 TblOps.push_back(Ops[2]); 3703 return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty, 3704 Intrinsic::aarch64_neon_tbx1, "vtbx1"); 3705 } 3706 case NEON::BI__builtin_neon_vtbx3_v: { 3707 TblOps.push_back(Ops[1]); 3708 TblOps.push_back(Ops[2]); 3709 TblOps.push_back(Ops[3]); 3710 Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty, 3711 Intrinsic::aarch64_neon_tbl2, "vtbl2"); 3712 3713 llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24); 3714 Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour); 3715 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], 3716 TwentyFourV); 3717 CmpRes = Builder.CreateSExt(CmpRes, Ty); 3718 3719 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 3720 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 3721 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 3722 } 3723 case NEON::BI__builtin_neon_vtbx4_v: { 3724 TblOps.push_back(Ops[1]); 3725 TblOps.push_back(Ops[2]); 3726 TblOps.push_back(Ops[3]); 3727 TblOps.push_back(Ops[4]); 3728 return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty, 3729 Intrinsic::aarch64_neon_tbx2, "vtbx2"); 3730 } 3731 case NEON::BI__builtin_neon_vqtbl1_v: 3732 case NEON::BI__builtin_neon_vqtbl1q_v: 3733 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break; 3734 case NEON::BI__builtin_neon_vqtbl2_v: 3735 case NEON::BI__builtin_neon_vqtbl2q_v: { 3736 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break; 3737 case NEON::BI__builtin_neon_vqtbl3_v: 3738 case NEON::BI__builtin_neon_vqtbl3q_v: 3739 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break; 3740 case NEON::BI__builtin_neon_vqtbl4_v: 3741 case NEON::BI__builtin_neon_vqtbl4q_v: 3742 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break; 3743 case NEON::BI__builtin_neon_vqtbx1_v: 3744 case NEON::BI__builtin_neon_vqtbx1q_v: 3745 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break; 3746 case NEON::BI__builtin_neon_vqtbx2_v: 3747 case NEON::BI__builtin_neon_vqtbx2q_v: 3748 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break; 3749 case NEON::BI__builtin_neon_vqtbx3_v: 3750 case NEON::BI__builtin_neon_vqtbx3q_v: 3751 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break; 3752 case NEON::BI__builtin_neon_vqtbx4_v: 3753 case NEON::BI__builtin_neon_vqtbx4q_v: 3754 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break; 3755 } 3756 } 3757 3758 if (!Int) 3759 return nullptr; 3760 3761 Function *F = CGF.CGM.getIntrinsic(Int, Ty); 3762 return CGF.EmitNeonCall(F, Ops, s); 3763 } 3764 3765 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { 3766 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 3767 Op = Builder.CreateBitCast(Op, Int16Ty); 3768 Value *V = UndefValue::get(VTy); 3769 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 3770 Op = Builder.CreateInsertElement(V, Op, CI); 3771 return Op; 3772 } 3773 3774 Value *CodeGenFunction::vectorWrapScalar8(Value *Op) { 3775 llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8); 3776 Op = Builder.CreateBitCast(Op, Int8Ty); 3777 Value *V = UndefValue::get(VTy); 3778 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 3779 Op = Builder.CreateInsertElement(V, Op, CI); 3780 return Op; 3781 } 3782 3783 Value *CodeGenFunction:: 3784 emitVectorWrappedScalar8Intrinsic(unsigned Int, SmallVectorImpl<Value*> &Ops, 3785 const char *Name) { 3786 // i8 is not a legal types for AArch64, so we can't just use 3787 // a normal overloaded intrinsic call for these scalar types. Instead 3788 // we'll build 64-bit vectors w/ lane zero being our input values and 3789 // perform the operation on that. The back end can pattern match directly 3790 // to the scalar instruction. 3791 Ops[0] = vectorWrapScalar8(Ops[0]); 3792 Ops[1] = vectorWrapScalar8(Ops[1]); 3793 llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8); 3794 Value *V = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, Name); 3795 Constant *CI = ConstantInt::get(SizeTy, 0); 3796 return Builder.CreateExtractElement(V, CI, "lane0"); 3797 } 3798 3799 Value *CodeGenFunction:: 3800 emitVectorWrappedScalar16Intrinsic(unsigned Int, SmallVectorImpl<Value*> &Ops, 3801 const char *Name) { 3802 // i16 is not a legal types for AArch64, so we can't just use 3803 // a normal overloaded intrinsic call for these scalar types. Instead 3804 // we'll build 64-bit vectors w/ lane zero being our input values and 3805 // perform the operation on that. The back end can pattern match directly 3806 // to the scalar instruction. 3807 Ops[0] = vectorWrapScalar16(Ops[0]); 3808 Ops[1] = vectorWrapScalar16(Ops[1]); 3809 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 3810 Value *V = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, Name); 3811 Constant *CI = ConstantInt::get(SizeTy, 0); 3812 return Builder.CreateExtractElement(V, CI, "lane0"); 3813 } 3814 3815 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, 3816 const CallExpr *E) { 3817 unsigned HintID = static_cast<unsigned>(-1); 3818 switch (BuiltinID) { 3819 default: break; 3820 case AArch64::BI__builtin_arm_nop: 3821 HintID = 0; 3822 break; 3823 case AArch64::BI__builtin_arm_yield: 3824 HintID = 1; 3825 break; 3826 case AArch64::BI__builtin_arm_wfe: 3827 HintID = 2; 3828 break; 3829 case AArch64::BI__builtin_arm_wfi: 3830 HintID = 3; 3831 break; 3832 case AArch64::BI__builtin_arm_sev: 3833 HintID = 4; 3834 break; 3835 case AArch64::BI__builtin_arm_sevl: 3836 HintID = 5; 3837 break; 3838 } 3839 3840 if (HintID != static_cast<unsigned>(-1)) { 3841 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint); 3842 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 3843 } 3844 3845 if (BuiltinID == AArch64::BI__builtin_arm_rbit) { 3846 assert((getContext().getTypeSize(E->getType()) == 32) && 3847 "rbit of unusual size!"); 3848 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 3849 return Builder.CreateCall( 3850 CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit"); 3851 } 3852 if (BuiltinID == AArch64::BI__builtin_arm_rbit64) { 3853 assert((getContext().getTypeSize(E->getType()) == 64) && 3854 "rbit of unusual size!"); 3855 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 3856 return Builder.CreateCall( 3857 CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit"); 3858 } 3859 3860 if (BuiltinID == AArch64::BI__clear_cache) { 3861 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 3862 const FunctionDecl *FD = E->getDirectCallee(); 3863 SmallVector<Value*, 2> Ops; 3864 for (unsigned i = 0; i < 2; i++) 3865 Ops.push_back(EmitScalarExpr(E->getArg(i))); 3866 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 3867 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 3868 StringRef Name = FD->getName(); 3869 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 3870 } 3871 3872 if ((BuiltinID == AArch64::BI__builtin_arm_ldrex || 3873 BuiltinID == AArch64::BI__builtin_arm_ldaex) && 3874 getContext().getTypeSize(E->getType()) == 128) { 3875 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 3876 ? Intrinsic::aarch64_ldaxp 3877 : Intrinsic::aarch64_ldxp); 3878 3879 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 3880 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 3881 "ldxp"); 3882 3883 Value *Val0 = Builder.CreateExtractValue(Val, 1); 3884 Value *Val1 = Builder.CreateExtractValue(Val, 0); 3885 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 3886 Val0 = Builder.CreateZExt(Val0, Int128Ty); 3887 Val1 = Builder.CreateZExt(Val1, Int128Ty); 3888 3889 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64); 3890 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 3891 Val = Builder.CreateOr(Val, Val1); 3892 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 3893 } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex || 3894 BuiltinID == AArch64::BI__builtin_arm_ldaex) { 3895 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 3896 3897 QualType Ty = E->getType(); 3898 llvm::Type *RealResTy = ConvertType(Ty); 3899 llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(), 3900 getContext().getTypeSize(Ty)); 3901 LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo()); 3902 3903 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 3904 ? Intrinsic::aarch64_ldaxr 3905 : Intrinsic::aarch64_ldxr, 3906 LoadAddr->getType()); 3907 Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); 3908 3909 if (RealResTy->isPointerTy()) 3910 return Builder.CreateIntToPtr(Val, RealResTy); 3911 3912 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 3913 return Builder.CreateBitCast(Val, RealResTy); 3914 } 3915 3916 if ((BuiltinID == AArch64::BI__builtin_arm_strex || 3917 BuiltinID == AArch64::BI__builtin_arm_stlex) && 3918 getContext().getTypeSize(E->getArg(0)->getType()) == 128) { 3919 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 3920 ? Intrinsic::aarch64_stlxp 3921 : Intrinsic::aarch64_stxp); 3922 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, NULL); 3923 3924 Value *One = llvm::ConstantInt::get(Int32Ty, 1); 3925 Value *Tmp = Builder.CreateAlloca(ConvertType(E->getArg(0)->getType()), 3926 One); 3927 Value *Val = EmitScalarExpr(E->getArg(0)); 3928 Builder.CreateStore(Val, Tmp); 3929 3930 Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 3931 Val = Builder.CreateLoad(LdPtr); 3932 3933 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 3934 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 3935 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), 3936 Int8PtrTy); 3937 return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "stxp"); 3938 } else if (BuiltinID == AArch64::BI__builtin_arm_strex || 3939 BuiltinID == AArch64::BI__builtin_arm_stlex) { 3940 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 3941 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 3942 3943 QualType Ty = E->getArg(0)->getType(); 3944 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 3945 getContext().getTypeSize(Ty)); 3946 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 3947 3948 if (StoreVal->getType()->isPointerTy()) 3949 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); 3950 else { 3951 StoreVal = Builder.CreateBitCast(StoreVal, StoreTy); 3952 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); 3953 } 3954 3955 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 3956 ? Intrinsic::aarch64_stlxr 3957 : Intrinsic::aarch64_stxr, 3958 StoreAddr->getType()); 3959 return Builder.CreateCall2(F, StoreVal, StoreAddr, "stxr"); 3960 } 3961 3962 if (BuiltinID == AArch64::BI__builtin_arm_clrex) { 3963 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); 3964 return Builder.CreateCall(F); 3965 } 3966 3967 // CRC32 3968 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 3969 switch (BuiltinID) { 3970 case AArch64::BI__builtin_arm_crc32b: 3971 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; 3972 case AArch64::BI__builtin_arm_crc32cb: 3973 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; 3974 case AArch64::BI__builtin_arm_crc32h: 3975 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; 3976 case AArch64::BI__builtin_arm_crc32ch: 3977 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; 3978 case AArch64::BI__builtin_arm_crc32w: 3979 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; 3980 case AArch64::BI__builtin_arm_crc32cw: 3981 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; 3982 case AArch64::BI__builtin_arm_crc32d: 3983 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; 3984 case AArch64::BI__builtin_arm_crc32cd: 3985 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; 3986 } 3987 3988 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 3989 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 3990 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 3991 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 3992 3993 llvm::Type *DataTy = F->getFunctionType()->getParamType(1); 3994 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); 3995 3996 return Builder.CreateCall2(F, Arg0, Arg1); 3997 } 3998 3999 llvm::SmallVector<Value*, 4> Ops; 4000 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) 4001 Ops.push_back(EmitScalarExpr(E->getArg(i))); 4002 4003 ArrayRef<NeonIntrinsicInfo> SISDMap(AArch64SISDIntrinsicMap); 4004 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 4005 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); 4006 4007 if (Builtin) { 4008 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); 4009 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); 4010 assert(Result && "SISD intrinsic should have been handled"); 4011 return Result; 4012 } 4013 4014 llvm::APSInt Result; 4015 const Expr *Arg = E->getArg(E->getNumArgs()-1); 4016 NeonTypeFlags Type(0); 4017 if (Arg->isIntegerConstantExpr(Result, getContext())) 4018 // Determine the type of this overloaded NEON intrinsic. 4019 Type = NeonTypeFlags(Result.getZExtValue()); 4020 4021 bool usgn = Type.isUnsigned(); 4022 bool quad = Type.isQuad(); 4023 4024 // Handle non-overloaded intrinsics first. 4025 switch (BuiltinID) { 4026 default: break; 4027 case NEON::BI__builtin_neon_vldrq_p128: { 4028 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 4029 Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); 4030 return Builder.CreateLoad(Ptr); 4031 } 4032 case NEON::BI__builtin_neon_vstrq_p128: { 4033 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 4034 Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); 4035 return Builder.CreateStore(EmitScalarExpr(E->getArg(1)), Ptr); 4036 } 4037 case NEON::BI__builtin_neon_vcvts_u32_f32: 4038 case NEON::BI__builtin_neon_vcvtd_u64_f64: 4039 usgn = true; 4040 // FALL THROUGH 4041 case NEON::BI__builtin_neon_vcvts_s32_f32: 4042 case NEON::BI__builtin_neon_vcvtd_s64_f64: { 4043 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4044 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 4045 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 4046 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 4047 Ops[0] = Builder.CreateBitCast(Ops[0], FTy); 4048 if (usgn) 4049 return Builder.CreateFPToUI(Ops[0], InTy); 4050 return Builder.CreateFPToSI(Ops[0], InTy); 4051 } 4052 case NEON::BI__builtin_neon_vcvts_f32_u32: 4053 case NEON::BI__builtin_neon_vcvtd_f64_u64: 4054 usgn = true; 4055 // FALL THROUGH 4056 case NEON::BI__builtin_neon_vcvts_f32_s32: 4057 case NEON::BI__builtin_neon_vcvtd_f64_s64: { 4058 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4059 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 4060 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 4061 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 4062 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 4063 if (usgn) 4064 return Builder.CreateUIToFP(Ops[0], FTy); 4065 return Builder.CreateSIToFP(Ops[0], FTy); 4066 } 4067 case NEON::BI__builtin_neon_vpaddd_s64: { 4068 llvm::Type *Ty = 4069 llvm::VectorType::get(llvm::Type::getInt64Ty(getLLVMContext()), 2); 4070 Value *Vec = EmitScalarExpr(E->getArg(0)); 4071 // The vector is v2f64, so make sure it's bitcast to that. 4072 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); 4073 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 4074 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 4075 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 4076 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 4077 // Pairwise addition of a v2f64 into a scalar f64. 4078 return Builder.CreateAdd(Op0, Op1, "vpaddd"); 4079 } 4080 case NEON::BI__builtin_neon_vpaddd_f64: { 4081 llvm::Type *Ty = 4082 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2); 4083 Value *Vec = EmitScalarExpr(E->getArg(0)); 4084 // The vector is v2f64, so make sure it's bitcast to that. 4085 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); 4086 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 4087 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 4088 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 4089 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 4090 // Pairwise addition of a v2f64 into a scalar f64. 4091 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 4092 } 4093 case NEON::BI__builtin_neon_vpadds_f32: { 4094 llvm::Type *Ty = 4095 llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2); 4096 Value *Vec = EmitScalarExpr(E->getArg(0)); 4097 // The vector is v2f32, so make sure it's bitcast to that. 4098 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); 4099 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 4100 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 4101 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 4102 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 4103 // Pairwise addition of a v2f32 into a scalar f32. 4104 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 4105 } 4106 case NEON::BI__builtin_neon_vceqzd_s64: 4107 case NEON::BI__builtin_neon_vceqzd_f64: 4108 case NEON::BI__builtin_neon_vceqzs_f32: 4109 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4110 return EmitAArch64CompareBuiltinExpr( 4111 Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OEQ, 4112 ICmpInst::ICMP_EQ, "vceqz"); 4113 case NEON::BI__builtin_neon_vcgezd_s64: 4114 case NEON::BI__builtin_neon_vcgezd_f64: 4115 case NEON::BI__builtin_neon_vcgezs_f32: 4116 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4117 return EmitAArch64CompareBuiltinExpr( 4118 Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OGE, 4119 ICmpInst::ICMP_SGE, "vcgez"); 4120 case NEON::BI__builtin_neon_vclezd_s64: 4121 case NEON::BI__builtin_neon_vclezd_f64: 4122 case NEON::BI__builtin_neon_vclezs_f32: 4123 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4124 return EmitAArch64CompareBuiltinExpr( 4125 Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OLE, 4126 ICmpInst::ICMP_SLE, "vclez"); 4127 case NEON::BI__builtin_neon_vcgtzd_s64: 4128 case NEON::BI__builtin_neon_vcgtzd_f64: 4129 case NEON::BI__builtin_neon_vcgtzs_f32: 4130 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4131 return EmitAArch64CompareBuiltinExpr( 4132 Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OGT, 4133 ICmpInst::ICMP_SGT, "vcgtz"); 4134 case NEON::BI__builtin_neon_vcltzd_s64: 4135 case NEON::BI__builtin_neon_vcltzd_f64: 4136 case NEON::BI__builtin_neon_vcltzs_f32: 4137 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4138 return EmitAArch64CompareBuiltinExpr( 4139 Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OLT, 4140 ICmpInst::ICMP_SLT, "vcltz"); 4141 4142 case NEON::BI__builtin_neon_vceqzd_u64: { 4143 llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext()); 4144 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4145 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4146 Ops[0] = Builder.CreateICmp(llvm::ICmpInst::ICMP_EQ, Ops[0], 4147 llvm::Constant::getNullValue(Ty)); 4148 return Builder.CreateSExt(Ops[0], Ty, "vceqzd"); 4149 } 4150 case NEON::BI__builtin_neon_vceqd_f64: 4151 case NEON::BI__builtin_neon_vcled_f64: 4152 case NEON::BI__builtin_neon_vcltd_f64: 4153 case NEON::BI__builtin_neon_vcged_f64: 4154 case NEON::BI__builtin_neon_vcgtd_f64: { 4155 llvm::CmpInst::Predicate P; 4156 switch (BuiltinID) { 4157 default: llvm_unreachable("missing builtin ID in switch!"); 4158 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break; 4159 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break; 4160 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break; 4161 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break; 4162 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break; 4163 } 4164 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4165 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 4166 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 4167 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 4168 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); 4169 } 4170 case NEON::BI__builtin_neon_vceqs_f32: 4171 case NEON::BI__builtin_neon_vcles_f32: 4172 case NEON::BI__builtin_neon_vclts_f32: 4173 case NEON::BI__builtin_neon_vcges_f32: 4174 case NEON::BI__builtin_neon_vcgts_f32: { 4175 llvm::CmpInst::Predicate P; 4176 switch (BuiltinID) { 4177 default: llvm_unreachable("missing builtin ID in switch!"); 4178 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break; 4179 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break; 4180 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break; 4181 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break; 4182 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break; 4183 } 4184 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4185 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 4186 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); 4187 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 4188 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); 4189 } 4190 case NEON::BI__builtin_neon_vceqd_s64: 4191 case NEON::BI__builtin_neon_vceqd_u64: 4192 case NEON::BI__builtin_neon_vcgtd_s64: 4193 case NEON::BI__builtin_neon_vcgtd_u64: 4194 case NEON::BI__builtin_neon_vcltd_s64: 4195 case NEON::BI__builtin_neon_vcltd_u64: 4196 case NEON::BI__builtin_neon_vcged_u64: 4197 case NEON::BI__builtin_neon_vcged_s64: 4198 case NEON::BI__builtin_neon_vcled_u64: 4199 case NEON::BI__builtin_neon_vcled_s64: { 4200 llvm::CmpInst::Predicate P; 4201 switch (BuiltinID) { 4202 default: llvm_unreachable("missing builtin ID in switch!"); 4203 case NEON::BI__builtin_neon_vceqd_s64: 4204 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break; 4205 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break; 4206 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break; 4207 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break; 4208 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break; 4209 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break; 4210 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break; 4211 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break; 4212 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break; 4213 } 4214 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4215 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 4216 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 4217 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); 4218 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); 4219 } 4220 case NEON::BI__builtin_neon_vtstd_s64: 4221 case NEON::BI__builtin_neon_vtstd_u64: { 4222 llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext()); 4223 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4224 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4225 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4226 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 4227 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 4228 llvm::Constant::getNullValue(Ty)); 4229 return Builder.CreateSExt(Ops[0], Ty, "vtstd"); 4230 } 4231 case NEON::BI__builtin_neon_vset_lane_i8: 4232 case NEON::BI__builtin_neon_vset_lane_i16: 4233 case NEON::BI__builtin_neon_vset_lane_i32: 4234 case NEON::BI__builtin_neon_vset_lane_i64: 4235 case NEON::BI__builtin_neon_vset_lane_f32: 4236 case NEON::BI__builtin_neon_vsetq_lane_i8: 4237 case NEON::BI__builtin_neon_vsetq_lane_i16: 4238 case NEON::BI__builtin_neon_vsetq_lane_i32: 4239 case NEON::BI__builtin_neon_vsetq_lane_i64: 4240 case NEON::BI__builtin_neon_vsetq_lane_f32: 4241 Ops.push_back(EmitScalarExpr(E->getArg(2))); 4242 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4243 case NEON::BI__builtin_neon_vset_lane_f64: 4244 // The vector type needs a cast for the v1f64 variant. 4245 Ops[1] = Builder.CreateBitCast(Ops[1], 4246 llvm::VectorType::get(DoubleTy, 1)); 4247 Ops.push_back(EmitScalarExpr(E->getArg(2))); 4248 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4249 case NEON::BI__builtin_neon_vsetq_lane_f64: 4250 // The vector type needs a cast for the v2f64 variant. 4251 Ops[1] = Builder.CreateBitCast(Ops[1], 4252 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2)); 4253 Ops.push_back(EmitScalarExpr(E->getArg(2))); 4254 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4255 4256 case NEON::BI__builtin_neon_vget_lane_i8: 4257 case NEON::BI__builtin_neon_vdupb_lane_i8: 4258 Ops[0] = Builder.CreateBitCast(Ops[0], 4259 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8)); 4260 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4261 "vget_lane"); 4262 case NEON::BI__builtin_neon_vgetq_lane_i8: 4263 case NEON::BI__builtin_neon_vdupb_laneq_i8: 4264 Ops[0] = Builder.CreateBitCast(Ops[0], 4265 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16)); 4266 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4267 "vgetq_lane"); 4268 case NEON::BI__builtin_neon_vget_lane_i16: 4269 case NEON::BI__builtin_neon_vduph_lane_i16: 4270 Ops[0] = Builder.CreateBitCast(Ops[0], 4271 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4)); 4272 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4273 "vget_lane"); 4274 case NEON::BI__builtin_neon_vgetq_lane_i16: 4275 case NEON::BI__builtin_neon_vduph_laneq_i16: 4276 Ops[0] = Builder.CreateBitCast(Ops[0], 4277 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8)); 4278 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4279 "vgetq_lane"); 4280 case NEON::BI__builtin_neon_vget_lane_i32: 4281 case NEON::BI__builtin_neon_vdups_lane_i32: 4282 Ops[0] = Builder.CreateBitCast( 4283 Ops[0], 4284 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 32), 2)); 4285 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4286 "vget_lane"); 4287 case NEON::BI__builtin_neon_vdups_lane_f32: 4288 Ops[0] = Builder.CreateBitCast(Ops[0], 4289 llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2)); 4290 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4291 "vdups_lane"); 4292 case NEON::BI__builtin_neon_vgetq_lane_i32: 4293 case NEON::BI__builtin_neon_vdups_laneq_i32: 4294 Ops[0] = Builder.CreateBitCast(Ops[0], 4295 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 32), 4)); 4296 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4297 "vgetq_lane"); 4298 case NEON::BI__builtin_neon_vget_lane_i64: 4299 case NEON::BI__builtin_neon_vdupd_lane_i64: 4300 Ops[0] = Builder.CreateBitCast(Ops[0], 4301 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 64), 1)); 4302 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4303 "vget_lane"); 4304 case NEON::BI__builtin_neon_vdupd_lane_f64: 4305 Ops[0] = Builder.CreateBitCast(Ops[0], 4306 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 1)); 4307 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4308 "vdupd_lane"); 4309 case NEON::BI__builtin_neon_vgetq_lane_i64: 4310 case NEON::BI__builtin_neon_vdupd_laneq_i64: 4311 Ops[0] = Builder.CreateBitCast(Ops[0], 4312 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 64), 2)); 4313 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4314 "vgetq_lane"); 4315 case NEON::BI__builtin_neon_vget_lane_f32: 4316 Ops[0] = Builder.CreateBitCast(Ops[0], 4317 llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2)); 4318 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4319 "vget_lane"); 4320 case NEON::BI__builtin_neon_vget_lane_f64: 4321 Ops[0] = Builder.CreateBitCast(Ops[0], 4322 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 1)); 4323 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4324 "vget_lane"); 4325 case NEON::BI__builtin_neon_vgetq_lane_f32: 4326 case NEON::BI__builtin_neon_vdups_laneq_f32: 4327 Ops[0] = Builder.CreateBitCast(Ops[0], 4328 llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 4)); 4329 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4330 "vgetq_lane"); 4331 case NEON::BI__builtin_neon_vgetq_lane_f64: 4332 case NEON::BI__builtin_neon_vdupd_laneq_f64: 4333 Ops[0] = Builder.CreateBitCast(Ops[0], 4334 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2)); 4335 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4336 "vgetq_lane"); 4337 case NEON::BI__builtin_neon_vaddd_s64: 4338 case NEON::BI__builtin_neon_vaddd_u64: 4339 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); 4340 case NEON::BI__builtin_neon_vsubd_s64: 4341 case NEON::BI__builtin_neon_vsubd_u64: 4342 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd"); 4343 case NEON::BI__builtin_neon_vqdmlalh_s16: 4344 case NEON::BI__builtin_neon_vqdmlslh_s16: { 4345 SmallVector<Value *, 2> ProductOps; 4346 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 4347 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); 4348 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 4349 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 4350 ProductOps, "vqdmlXl"); 4351 Constant *CI = ConstantInt::get(SizeTy, 0); 4352 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 4353 4354 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 4355 ? Intrinsic::aarch64_neon_sqadd 4356 : Intrinsic::aarch64_neon_sqsub; 4357 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl"); 4358 } 4359 case NEON::BI__builtin_neon_vqshlud_n_s64: { 4360 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4361 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 4362 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty), 4363 Ops, "vqshlu_n"); 4364 } 4365 case NEON::BI__builtin_neon_vqshld_n_u64: 4366 case NEON::BI__builtin_neon_vqshld_n_s64: { 4367 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64 4368 ? Intrinsic::aarch64_neon_uqshl 4369 : Intrinsic::aarch64_neon_sqshl; 4370 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4371 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 4372 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n"); 4373 } 4374 case NEON::BI__builtin_neon_vrshrd_n_u64: 4375 case NEON::BI__builtin_neon_vrshrd_n_s64: { 4376 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64 4377 ? Intrinsic::aarch64_neon_urshl 4378 : Intrinsic::aarch64_neon_srshl; 4379 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4380 int SV = cast<ConstantInt>(Ops[1])->getSExtValue(); 4381 Ops[1] = ConstantInt::get(Int64Ty, -SV); 4382 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n"); 4383 } 4384 case NEON::BI__builtin_neon_vrsrad_n_u64: 4385 case NEON::BI__builtin_neon_vrsrad_n_s64: { 4386 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64 4387 ? Intrinsic::aarch64_neon_urshl 4388 : Intrinsic::aarch64_neon_srshl; 4389 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 4390 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); 4391 Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Int64Ty), Ops[1], 4392 Builder.CreateSExt(Ops[2], Int64Ty)); 4393 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); 4394 } 4395 case NEON::BI__builtin_neon_vshld_n_s64: 4396 case NEON::BI__builtin_neon_vshld_n_u64: { 4397 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 4398 return Builder.CreateShl( 4399 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n"); 4400 } 4401 case NEON::BI__builtin_neon_vshrd_n_s64: { 4402 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 4403 return Builder.CreateAShr( 4404 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 4405 Amt->getZExtValue())), 4406 "shrd_n"); 4407 } 4408 case NEON::BI__builtin_neon_vshrd_n_u64: { 4409 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 4410 uint64_t ShiftAmt = Amt->getZExtValue(); 4411 // Right-shifting an unsigned value by its size yields 0. 4412 if (ShiftAmt == 64) 4413 return ConstantInt::get(Int64Ty, 0); 4414 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt), 4415 "shrd_n"); 4416 } 4417 case NEON::BI__builtin_neon_vsrad_n_s64: { 4418 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 4419 Ops[1] = Builder.CreateAShr( 4420 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 4421 Amt->getZExtValue())), 4422 "shrd_n"); 4423 return Builder.CreateAdd(Ops[0], Ops[1]); 4424 } 4425 case NEON::BI__builtin_neon_vsrad_n_u64: { 4426 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 4427 uint64_t ShiftAmt = Amt->getZExtValue(); 4428 // Right-shifting an unsigned value by its size yields 0. 4429 // As Op + 0 = Op, return Ops[0] directly. 4430 if (ShiftAmt == 64) 4431 return Ops[0]; 4432 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt), 4433 "shrd_n"); 4434 return Builder.CreateAdd(Ops[0], Ops[1]); 4435 } 4436 case NEON::BI__builtin_neon_vqdmlalh_lane_s16: 4437 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: 4438 case NEON::BI__builtin_neon_vqdmlslh_lane_s16: 4439 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: { 4440 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 4441 "lane"); 4442 SmallVector<Value *, 2> ProductOps; 4443 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 4444 ProductOps.push_back(vectorWrapScalar16(Ops[2])); 4445 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 4446 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 4447 ProductOps, "vqdmlXl"); 4448 Constant *CI = ConstantInt::get(SizeTy, 0); 4449 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 4450 Ops.pop_back(); 4451 4452 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 || 4453 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16) 4454 ? Intrinsic::aarch64_neon_sqadd 4455 : Intrinsic::aarch64_neon_sqsub; 4456 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl"); 4457 } 4458 case NEON::BI__builtin_neon_vqdmlals_s32: 4459 case NEON::BI__builtin_neon_vqdmlsls_s32: { 4460 SmallVector<Value *, 2> ProductOps; 4461 ProductOps.push_back(Ops[1]); 4462 ProductOps.push_back(EmitScalarExpr(E->getArg(2))); 4463 Ops[1] = 4464 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 4465 ProductOps, "vqdmlXl"); 4466 4467 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32 4468 ? Intrinsic::aarch64_neon_sqadd 4469 : Intrinsic::aarch64_neon_sqsub; 4470 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl"); 4471 } 4472 case NEON::BI__builtin_neon_vqdmlals_lane_s32: 4473 case NEON::BI__builtin_neon_vqdmlals_laneq_s32: 4474 case NEON::BI__builtin_neon_vqdmlsls_lane_s32: 4475 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { 4476 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 4477 "lane"); 4478 SmallVector<Value *, 2> ProductOps; 4479 ProductOps.push_back(Ops[1]); 4480 ProductOps.push_back(Ops[2]); 4481 Ops[1] = 4482 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 4483 ProductOps, "vqdmlXl"); 4484 Ops.pop_back(); 4485 4486 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 || 4487 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32) 4488 ? Intrinsic::aarch64_neon_sqadd 4489 : Intrinsic::aarch64_neon_sqsub; 4490 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); 4491 } 4492 } 4493 4494 llvm::VectorType *VTy = GetNeonType(this, Type); 4495 llvm::Type *Ty = VTy; 4496 if (!Ty) 4497 return nullptr; 4498 4499 // Not all intrinsics handled by the common case work for AArch64 yet, so only 4500 // defer to common code if it's been added to our special map. 4501 Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, 4502 AArch64SIMDIntrinsicsProvenSorted); 4503 4504 if (Builtin) 4505 return EmitCommonNeonBuiltinExpr( 4506 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 4507 Builtin->NameHint, Builtin->TypeModifier, E, Ops, nullptr); 4508 4509 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops)) 4510 return V; 4511 4512 unsigned Int; 4513 switch (BuiltinID) { 4514 default: return nullptr; 4515 case NEON::BI__builtin_neon_vbsl_v: 4516 case NEON::BI__builtin_neon_vbslq_v: { 4517 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy); 4518 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl"); 4519 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl"); 4520 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl"); 4521 4522 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl"); 4523 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl"); 4524 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl"); 4525 return Builder.CreateBitCast(Ops[0], Ty); 4526 } 4527 case NEON::BI__builtin_neon_vfma_lane_v: 4528 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types 4529 // The ARM builtins (and instructions) have the addend as the first 4530 // operand, but the 'fma' intrinsics have it last. Swap it around here. 4531 Value *Addend = Ops[0]; 4532 Value *Multiplicand = Ops[1]; 4533 Value *LaneSource = Ops[2]; 4534 Ops[0] = Multiplicand; 4535 Ops[1] = LaneSource; 4536 Ops[2] = Addend; 4537 4538 // Now adjust things to handle the lane access. 4539 llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ? 4540 llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) : 4541 VTy; 4542 llvm::Constant *cst = cast<Constant>(Ops[3]); 4543 Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst); 4544 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); 4545 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); 4546 4547 Ops.pop_back(); 4548 Int = Intrinsic::fma; 4549 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); 4550 } 4551 case NEON::BI__builtin_neon_vfma_laneq_v: { 4552 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 4553 // v1f64 fma should be mapped to Neon scalar f64 fma 4554 if (VTy && VTy->getElementType() == DoubleTy) { 4555 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 4556 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 4557 llvm::Type *VTy = GetNeonType(this, 4558 NeonTypeFlags(NeonTypeFlags::Float64, false, true)); 4559 Ops[2] = Builder.CreateBitCast(Ops[2], VTy); 4560 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 4561 Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); 4562 Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 4563 return Builder.CreateBitCast(Result, Ty); 4564 } 4565 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 4566 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4567 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4568 4569 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 4570 VTy->getNumElements() * 2); 4571 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 4572 Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), 4573 cast<ConstantInt>(Ops[3])); 4574 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 4575 4576 return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); 4577 } 4578 case NEON::BI__builtin_neon_vfmaq_laneq_v: { 4579 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 4580 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4581 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4582 4583 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4584 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 4585 return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); 4586 } 4587 case NEON::BI__builtin_neon_vfmas_lane_f32: 4588 case NEON::BI__builtin_neon_vfmas_laneq_f32: 4589 case NEON::BI__builtin_neon_vfmad_lane_f64: 4590 case NEON::BI__builtin_neon_vfmad_laneq_f64: { 4591 Ops.push_back(EmitScalarExpr(E->getArg(3))); 4592 llvm::Type *Ty = ConvertType(E->getCallReturnType()); 4593 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 4594 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 4595 return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 4596 } 4597 case NEON::BI__builtin_neon_vfms_v: 4598 case NEON::BI__builtin_neon_vfmsq_v: { // Only used for FP types 4599 // FIXME: probably remove when we no longer support aarch64_simd.h 4600 // (arm_neon.h delegates to vfma). 4601 4602 // The ARM builtins (and instructions) have the addend as the first 4603 // operand, but the 'fma' intrinsics have it last. Swap it around here. 4604 Value *Subtrahend = Ops[0]; 4605 Value *Multiplicand = Ops[2]; 4606 Ops[0] = Multiplicand; 4607 Ops[2] = Subtrahend; 4608 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 4609 Ops[1] = Builder.CreateFNeg(Ops[1]); 4610 Int = Intrinsic::fma; 4611 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmls"); 4612 } 4613 case NEON::BI__builtin_neon_vmull_v: 4614 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4615 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; 4616 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull; 4617 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 4618 case NEON::BI__builtin_neon_vmax_v: 4619 case NEON::BI__builtin_neon_vmaxq_v: 4620 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4621 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; 4622 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; 4623 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); 4624 case NEON::BI__builtin_neon_vmin_v: 4625 case NEON::BI__builtin_neon_vminq_v: 4626 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4627 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; 4628 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; 4629 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); 4630 case NEON::BI__builtin_neon_vabd_v: 4631 case NEON::BI__builtin_neon_vabdq_v: 4632 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4633 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd; 4634 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd; 4635 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); 4636 case NEON::BI__builtin_neon_vpadal_v: 4637 case NEON::BI__builtin_neon_vpadalq_v: { 4638 unsigned ArgElts = VTy->getNumElements(); 4639 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType()); 4640 unsigned BitWidth = EltTy->getBitWidth(); 4641 llvm::Type *ArgTy = llvm::VectorType::get( 4642 llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts); 4643 llvm::Type* Tys[2] = { VTy, ArgTy }; 4644 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp; 4645 SmallVector<llvm::Value*, 1> TmpOps; 4646 TmpOps.push_back(Ops[1]); 4647 Function *F = CGM.getIntrinsic(Int, Tys); 4648 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal"); 4649 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType()); 4650 return Builder.CreateAdd(tmp, addend); 4651 } 4652 case NEON::BI__builtin_neon_vpmin_v: 4653 case NEON::BI__builtin_neon_vpminq_v: 4654 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4655 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp; 4656 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp; 4657 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 4658 case NEON::BI__builtin_neon_vpmax_v: 4659 case NEON::BI__builtin_neon_vpmaxq_v: 4660 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4661 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp; 4662 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp; 4663 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 4664 case NEON::BI__builtin_neon_vminnm_v: 4665 case NEON::BI__builtin_neon_vminnmq_v: 4666 Int = Intrinsic::aarch64_neon_fminnm; 4667 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); 4668 case NEON::BI__builtin_neon_vmaxnm_v: 4669 case NEON::BI__builtin_neon_vmaxnmq_v: 4670 Int = Intrinsic::aarch64_neon_fmaxnm; 4671 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); 4672 case NEON::BI__builtin_neon_vrecpss_f32: { 4673 llvm::Type *f32Type = llvm::Type::getFloatTy(getLLVMContext()); 4674 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4675 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, f32Type), 4676 Ops, "vrecps"); 4677 } 4678 case NEON::BI__builtin_neon_vrecpsd_f64: { 4679 llvm::Type *f64Type = llvm::Type::getDoubleTy(getLLVMContext()); 4680 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4681 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, f64Type), 4682 Ops, "vrecps"); 4683 } 4684 case NEON::BI__builtin_neon_vqshrun_n_v: 4685 Int = Intrinsic::aarch64_neon_sqshrun; 4686 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); 4687 case NEON::BI__builtin_neon_vqrshrun_n_v: 4688 Int = Intrinsic::aarch64_neon_sqrshrun; 4689 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); 4690 case NEON::BI__builtin_neon_vqshrn_n_v: 4691 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn; 4692 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); 4693 case NEON::BI__builtin_neon_vrshrn_n_v: 4694 Int = Intrinsic::aarch64_neon_rshrn; 4695 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); 4696 case NEON::BI__builtin_neon_vqrshrn_n_v: 4697 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; 4698 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); 4699 case NEON::BI__builtin_neon_vrnda_v: 4700 case NEON::BI__builtin_neon_vrndaq_v: { 4701 Int = Intrinsic::round; 4702 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); 4703 } 4704 case NEON::BI__builtin_neon_vrndi_v: 4705 case NEON::BI__builtin_neon_vrndiq_v: { 4706 Int = Intrinsic::nearbyint; 4707 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); 4708 } 4709 case NEON::BI__builtin_neon_vrndm_v: 4710 case NEON::BI__builtin_neon_vrndmq_v: { 4711 Int = Intrinsic::floor; 4712 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); 4713 } 4714 case NEON::BI__builtin_neon_vrndn_v: 4715 case NEON::BI__builtin_neon_vrndnq_v: { 4716 Int = Intrinsic::aarch64_neon_frintn; 4717 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); 4718 } 4719 case NEON::BI__builtin_neon_vrndp_v: 4720 case NEON::BI__builtin_neon_vrndpq_v: { 4721 Int = Intrinsic::ceil; 4722 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); 4723 } 4724 case NEON::BI__builtin_neon_vrndx_v: 4725 case NEON::BI__builtin_neon_vrndxq_v: { 4726 Int = Intrinsic::rint; 4727 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); 4728 } 4729 case NEON::BI__builtin_neon_vrnd_v: 4730 case NEON::BI__builtin_neon_vrndq_v: { 4731 Int = Intrinsic::trunc; 4732 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); 4733 } 4734 case NEON::BI__builtin_neon_vceqz_v: 4735 case NEON::BI__builtin_neon_vceqzq_v: 4736 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, 4737 ICmpInst::ICMP_EQ, "vceqz"); 4738 case NEON::BI__builtin_neon_vcgez_v: 4739 case NEON::BI__builtin_neon_vcgezq_v: 4740 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, 4741 ICmpInst::ICMP_SGE, "vcgez"); 4742 case NEON::BI__builtin_neon_vclez_v: 4743 case NEON::BI__builtin_neon_vclezq_v: 4744 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, 4745 ICmpInst::ICMP_SLE, "vclez"); 4746 case NEON::BI__builtin_neon_vcgtz_v: 4747 case NEON::BI__builtin_neon_vcgtzq_v: 4748 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, 4749 ICmpInst::ICMP_SGT, "vcgtz"); 4750 case NEON::BI__builtin_neon_vcltz_v: 4751 case NEON::BI__builtin_neon_vcltzq_v: 4752 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, 4753 ICmpInst::ICMP_SLT, "vcltz"); 4754 case NEON::BI__builtin_neon_vcvt_f64_v: 4755 case NEON::BI__builtin_neon_vcvtq_f64_v: 4756 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4757 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 4758 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 4759 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 4760 case NEON::BI__builtin_neon_vcvt_f64_f32: { 4761 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && 4762 "unexpected vcvt_f64_f32 builtin"); 4763 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); 4764 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 4765 4766 return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); 4767 } 4768 case NEON::BI__builtin_neon_vcvt_f32_f64: { 4769 assert(Type.getEltType() == NeonTypeFlags::Float32 && 4770 "unexpected vcvt_f32_f64 builtin"); 4771 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); 4772 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 4773 4774 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); 4775 } 4776 case NEON::BI__builtin_neon_vcvt_s32_v: 4777 case NEON::BI__builtin_neon_vcvt_u32_v: 4778 case NEON::BI__builtin_neon_vcvt_s64_v: 4779 case NEON::BI__builtin_neon_vcvt_u64_v: 4780 case NEON::BI__builtin_neon_vcvtq_s32_v: 4781 case NEON::BI__builtin_neon_vcvtq_u32_v: 4782 case NEON::BI__builtin_neon_vcvtq_s64_v: 4783 case NEON::BI__builtin_neon_vcvtq_u64_v: { 4784 bool Double = 4785 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 4786 llvm::Type *InTy = 4787 GetNeonType(this, 4788 NeonTypeFlags(Double ? NeonTypeFlags::Float64 4789 : NeonTypeFlags::Float32, false, quad)); 4790 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 4791 if (usgn) 4792 return Builder.CreateFPToUI(Ops[0], Ty); 4793 return Builder.CreateFPToSI(Ops[0], Ty); 4794 } 4795 case NEON::BI__builtin_neon_vcvta_s32_v: 4796 case NEON::BI__builtin_neon_vcvtaq_s32_v: 4797 case NEON::BI__builtin_neon_vcvta_u32_v: 4798 case NEON::BI__builtin_neon_vcvtaq_u32_v: 4799 case NEON::BI__builtin_neon_vcvta_s64_v: 4800 case NEON::BI__builtin_neon_vcvtaq_s64_v: 4801 case NEON::BI__builtin_neon_vcvta_u64_v: 4802 case NEON::BI__builtin_neon_vcvtaq_u64_v: { 4803 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas; 4804 bool Double = 4805 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 4806 llvm::Type *InTy = 4807 GetNeonType(this, 4808 NeonTypeFlags(Double ? NeonTypeFlags::Float64 4809 : NeonTypeFlags::Float32, false, quad)); 4810 llvm::Type *Tys[2] = { Ty, InTy }; 4811 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); 4812 } 4813 case NEON::BI__builtin_neon_vcvtm_s32_v: 4814 case NEON::BI__builtin_neon_vcvtmq_s32_v: 4815 case NEON::BI__builtin_neon_vcvtm_u32_v: 4816 case NEON::BI__builtin_neon_vcvtmq_u32_v: 4817 case NEON::BI__builtin_neon_vcvtm_s64_v: 4818 case NEON::BI__builtin_neon_vcvtmq_s64_v: 4819 case NEON::BI__builtin_neon_vcvtm_u64_v: 4820 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 4821 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms; 4822 bool Double = 4823 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 4824 llvm::Type *InTy = 4825 GetNeonType(this, 4826 NeonTypeFlags(Double ? NeonTypeFlags::Float64 4827 : NeonTypeFlags::Float32, false, quad)); 4828 llvm::Type *Tys[2] = { Ty, InTy }; 4829 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); 4830 } 4831 case NEON::BI__builtin_neon_vcvtn_s32_v: 4832 case NEON::BI__builtin_neon_vcvtnq_s32_v: 4833 case NEON::BI__builtin_neon_vcvtn_u32_v: 4834 case NEON::BI__builtin_neon_vcvtnq_u32_v: 4835 case NEON::BI__builtin_neon_vcvtn_s64_v: 4836 case NEON::BI__builtin_neon_vcvtnq_s64_v: 4837 case NEON::BI__builtin_neon_vcvtn_u64_v: 4838 case NEON::BI__builtin_neon_vcvtnq_u64_v: { 4839 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns; 4840 bool Double = 4841 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 4842 llvm::Type *InTy = 4843 GetNeonType(this, 4844 NeonTypeFlags(Double ? NeonTypeFlags::Float64 4845 : NeonTypeFlags::Float32, false, quad)); 4846 llvm::Type *Tys[2] = { Ty, InTy }; 4847 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); 4848 } 4849 case NEON::BI__builtin_neon_vcvtp_s32_v: 4850 case NEON::BI__builtin_neon_vcvtpq_s32_v: 4851 case NEON::BI__builtin_neon_vcvtp_u32_v: 4852 case NEON::BI__builtin_neon_vcvtpq_u32_v: 4853 case NEON::BI__builtin_neon_vcvtp_s64_v: 4854 case NEON::BI__builtin_neon_vcvtpq_s64_v: 4855 case NEON::BI__builtin_neon_vcvtp_u64_v: 4856 case NEON::BI__builtin_neon_vcvtpq_u64_v: { 4857 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps; 4858 bool Double = 4859 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 4860 llvm::Type *InTy = 4861 GetNeonType(this, 4862 NeonTypeFlags(Double ? NeonTypeFlags::Float64 4863 : NeonTypeFlags::Float32, false, quad)); 4864 llvm::Type *Tys[2] = { Ty, InTy }; 4865 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); 4866 } 4867 case NEON::BI__builtin_neon_vmulx_v: 4868 case NEON::BI__builtin_neon_vmulxq_v: { 4869 Int = Intrinsic::aarch64_neon_fmulx; 4870 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 4871 } 4872 case NEON::BI__builtin_neon_vmul_lane_v: 4873 case NEON::BI__builtin_neon_vmul_laneq_v: { 4874 // v1f64 vmul_lane should be mapped to Neon scalar mul lane 4875 bool Quad = false; 4876 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) 4877 Quad = true; 4878 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 4879 llvm::Type *VTy = GetNeonType(this, 4880 NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); 4881 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 4882 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 4883 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); 4884 return Builder.CreateBitCast(Result, Ty); 4885 } 4886 case NEON::BI__builtin_neon_vnegd_s64: 4887 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); 4888 case NEON::BI__builtin_neon_vpmaxnm_v: 4889 case NEON::BI__builtin_neon_vpmaxnmq_v: { 4890 Int = Intrinsic::aarch64_neon_fmaxnmp; 4891 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); 4892 } 4893 case NEON::BI__builtin_neon_vpminnm_v: 4894 case NEON::BI__builtin_neon_vpminnmq_v: { 4895 Int = Intrinsic::aarch64_neon_fminnmp; 4896 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); 4897 } 4898 case NEON::BI__builtin_neon_vsqrt_v: 4899 case NEON::BI__builtin_neon_vsqrtq_v: { 4900 Int = Intrinsic::sqrt; 4901 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4902 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); 4903 } 4904 case NEON::BI__builtin_neon_vrbit_v: 4905 case NEON::BI__builtin_neon_vrbitq_v: { 4906 Int = Intrinsic::aarch64_neon_rbit; 4907 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); 4908 } 4909 case NEON::BI__builtin_neon_vaddv_u8: 4910 // FIXME: These are handled by the AArch64 scalar code. 4911 usgn = true; 4912 // FALLTHROUGH 4913 case NEON::BI__builtin_neon_vaddv_s8: { 4914 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 4915 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 4916 VTy = 4917 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 4918 llvm::Type *Tys[2] = { Ty, VTy }; 4919 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4920 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 4921 return Builder.CreateTrunc(Ops[0], 4922 llvm::IntegerType::get(getLLVMContext(), 8)); 4923 } 4924 case NEON::BI__builtin_neon_vaddv_u16: 4925 usgn = true; 4926 // FALLTHROUGH 4927 case NEON::BI__builtin_neon_vaddv_s16: { 4928 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 4929 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 4930 VTy = 4931 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 4932 llvm::Type *Tys[2] = { Ty, VTy }; 4933 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4934 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 4935 return Builder.CreateTrunc(Ops[0], 4936 llvm::IntegerType::get(getLLVMContext(), 16)); 4937 } 4938 case NEON::BI__builtin_neon_vaddvq_u8: 4939 usgn = true; 4940 // FALLTHROUGH 4941 case NEON::BI__builtin_neon_vaddvq_s8: { 4942 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 4943 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 4944 VTy = 4945 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 4946 llvm::Type *Tys[2] = { Ty, VTy }; 4947 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4948 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 4949 return Builder.CreateTrunc(Ops[0], 4950 llvm::IntegerType::get(getLLVMContext(), 8)); 4951 } 4952 case NEON::BI__builtin_neon_vaddvq_u16: 4953 usgn = true; 4954 // FALLTHROUGH 4955 case NEON::BI__builtin_neon_vaddvq_s16: { 4956 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 4957 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 4958 VTy = 4959 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 4960 llvm::Type *Tys[2] = { Ty, VTy }; 4961 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4962 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 4963 return Builder.CreateTrunc(Ops[0], 4964 llvm::IntegerType::get(getLLVMContext(), 16)); 4965 } 4966 case NEON::BI__builtin_neon_vmaxv_u8: { 4967 Int = Intrinsic::aarch64_neon_umaxv; 4968 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 4969 VTy = 4970 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 4971 llvm::Type *Tys[2] = { Ty, VTy }; 4972 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4973 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 4974 return Builder.CreateTrunc(Ops[0], 4975 llvm::IntegerType::get(getLLVMContext(), 8)); 4976 } 4977 case NEON::BI__builtin_neon_vmaxv_u16: { 4978 Int = Intrinsic::aarch64_neon_umaxv; 4979 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 4980 VTy = 4981 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 4982 llvm::Type *Tys[2] = { Ty, VTy }; 4983 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4984 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 4985 return Builder.CreateTrunc(Ops[0], 4986 llvm::IntegerType::get(getLLVMContext(), 16)); 4987 } 4988 case NEON::BI__builtin_neon_vmaxvq_u8: { 4989 Int = Intrinsic::aarch64_neon_umaxv; 4990 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 4991 VTy = 4992 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 4993 llvm::Type *Tys[2] = { Ty, VTy }; 4994 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4995 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 4996 return Builder.CreateTrunc(Ops[0], 4997 llvm::IntegerType::get(getLLVMContext(), 8)); 4998 } 4999 case NEON::BI__builtin_neon_vmaxvq_u16: { 5000 Int = Intrinsic::aarch64_neon_umaxv; 5001 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5002 VTy = 5003 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5004 llvm::Type *Tys[2] = { Ty, VTy }; 5005 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5006 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5007 return Builder.CreateTrunc(Ops[0], 5008 llvm::IntegerType::get(getLLVMContext(), 16)); 5009 } 5010 case NEON::BI__builtin_neon_vmaxv_s8: { 5011 Int = Intrinsic::aarch64_neon_smaxv; 5012 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5013 VTy = 5014 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5015 llvm::Type *Tys[2] = { Ty, VTy }; 5016 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5017 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5018 return Builder.CreateTrunc(Ops[0], 5019 llvm::IntegerType::get(getLLVMContext(), 8)); 5020 } 5021 case NEON::BI__builtin_neon_vmaxv_s16: { 5022 Int = Intrinsic::aarch64_neon_smaxv; 5023 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5024 VTy = 5025 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5026 llvm::Type *Tys[2] = { Ty, VTy }; 5027 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5028 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5029 return Builder.CreateTrunc(Ops[0], 5030 llvm::IntegerType::get(getLLVMContext(), 16)); 5031 } 5032 case NEON::BI__builtin_neon_vmaxvq_s8: { 5033 Int = Intrinsic::aarch64_neon_smaxv; 5034 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5035 VTy = 5036 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5037 llvm::Type *Tys[2] = { Ty, VTy }; 5038 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5039 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5040 return Builder.CreateTrunc(Ops[0], 5041 llvm::IntegerType::get(getLLVMContext(), 8)); 5042 } 5043 case NEON::BI__builtin_neon_vmaxvq_s16: { 5044 Int = Intrinsic::aarch64_neon_smaxv; 5045 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5046 VTy = 5047 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5048 llvm::Type *Tys[2] = { Ty, VTy }; 5049 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5050 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5051 return Builder.CreateTrunc(Ops[0], 5052 llvm::IntegerType::get(getLLVMContext(), 16)); 5053 } 5054 case NEON::BI__builtin_neon_vminv_u8: { 5055 Int = Intrinsic::aarch64_neon_uminv; 5056 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5057 VTy = 5058 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5059 llvm::Type *Tys[2] = { Ty, VTy }; 5060 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5061 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5062 return Builder.CreateTrunc(Ops[0], 5063 llvm::IntegerType::get(getLLVMContext(), 8)); 5064 } 5065 case NEON::BI__builtin_neon_vminv_u16: { 5066 Int = Intrinsic::aarch64_neon_uminv; 5067 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5068 VTy = 5069 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5070 llvm::Type *Tys[2] = { Ty, VTy }; 5071 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5072 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5073 return Builder.CreateTrunc(Ops[0], 5074 llvm::IntegerType::get(getLLVMContext(), 16)); 5075 } 5076 case NEON::BI__builtin_neon_vminvq_u8: { 5077 Int = Intrinsic::aarch64_neon_uminv; 5078 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5079 VTy = 5080 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5081 llvm::Type *Tys[2] = { Ty, VTy }; 5082 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5083 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5084 return Builder.CreateTrunc(Ops[0], 5085 llvm::IntegerType::get(getLLVMContext(), 8)); 5086 } 5087 case NEON::BI__builtin_neon_vminvq_u16: { 5088 Int = Intrinsic::aarch64_neon_uminv; 5089 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5090 VTy = 5091 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5092 llvm::Type *Tys[2] = { Ty, VTy }; 5093 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5094 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5095 return Builder.CreateTrunc(Ops[0], 5096 llvm::IntegerType::get(getLLVMContext(), 16)); 5097 } 5098 case NEON::BI__builtin_neon_vminv_s8: { 5099 Int = Intrinsic::aarch64_neon_sminv; 5100 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5101 VTy = 5102 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5103 llvm::Type *Tys[2] = { Ty, VTy }; 5104 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5105 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5106 return Builder.CreateTrunc(Ops[0], 5107 llvm::IntegerType::get(getLLVMContext(), 8)); 5108 } 5109 case NEON::BI__builtin_neon_vminv_s16: { 5110 Int = Intrinsic::aarch64_neon_sminv; 5111 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5112 VTy = 5113 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5114 llvm::Type *Tys[2] = { Ty, VTy }; 5115 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5116 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5117 return Builder.CreateTrunc(Ops[0], 5118 llvm::IntegerType::get(getLLVMContext(), 16)); 5119 } 5120 case NEON::BI__builtin_neon_vminvq_s8: { 5121 Int = Intrinsic::aarch64_neon_sminv; 5122 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5123 VTy = 5124 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5125 llvm::Type *Tys[2] = { Ty, VTy }; 5126 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5127 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5128 return Builder.CreateTrunc(Ops[0], 5129 llvm::IntegerType::get(getLLVMContext(), 8)); 5130 } 5131 case NEON::BI__builtin_neon_vminvq_s16: { 5132 Int = Intrinsic::aarch64_neon_sminv; 5133 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5134 VTy = 5135 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5136 llvm::Type *Tys[2] = { Ty, VTy }; 5137 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5138 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5139 return Builder.CreateTrunc(Ops[0], 5140 llvm::IntegerType::get(getLLVMContext(), 16)); 5141 } 5142 case NEON::BI__builtin_neon_vmul_n_f64: { 5143 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5144 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); 5145 return Builder.CreateFMul(Ops[0], RHS); 5146 } 5147 case NEON::BI__builtin_neon_vaddlv_u8: { 5148 Int = Intrinsic::aarch64_neon_uaddlv; 5149 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5150 VTy = 5151 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5152 llvm::Type *Tys[2] = { Ty, VTy }; 5153 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5154 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5155 return Builder.CreateTrunc(Ops[0], 5156 llvm::IntegerType::get(getLLVMContext(), 16)); 5157 } 5158 case NEON::BI__builtin_neon_vaddlv_u16: { 5159 Int = Intrinsic::aarch64_neon_uaddlv; 5160 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5161 VTy = 5162 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5163 llvm::Type *Tys[2] = { Ty, VTy }; 5164 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5165 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5166 } 5167 case NEON::BI__builtin_neon_vaddlvq_u8: { 5168 Int = Intrinsic::aarch64_neon_uaddlv; 5169 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5170 VTy = 5171 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5172 llvm::Type *Tys[2] = { Ty, VTy }; 5173 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5174 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5175 return Builder.CreateTrunc(Ops[0], 5176 llvm::IntegerType::get(getLLVMContext(), 16)); 5177 } 5178 case NEON::BI__builtin_neon_vaddlvq_u16: { 5179 Int = Intrinsic::aarch64_neon_uaddlv; 5180 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5181 VTy = 5182 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5183 llvm::Type *Tys[2] = { Ty, VTy }; 5184 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5185 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5186 } 5187 case NEON::BI__builtin_neon_vaddlv_s8: { 5188 Int = Intrinsic::aarch64_neon_saddlv; 5189 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5190 VTy = 5191 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5192 llvm::Type *Tys[2] = { Ty, VTy }; 5193 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5194 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5195 return Builder.CreateTrunc(Ops[0], 5196 llvm::IntegerType::get(getLLVMContext(), 16)); 5197 } 5198 case NEON::BI__builtin_neon_vaddlv_s16: { 5199 Int = Intrinsic::aarch64_neon_saddlv; 5200 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5201 VTy = 5202 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5203 llvm::Type *Tys[2] = { Ty, VTy }; 5204 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5205 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5206 } 5207 case NEON::BI__builtin_neon_vaddlvq_s8: { 5208 Int = Intrinsic::aarch64_neon_saddlv; 5209 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5210 VTy = 5211 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5212 llvm::Type *Tys[2] = { Ty, VTy }; 5213 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5214 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5215 return Builder.CreateTrunc(Ops[0], 5216 llvm::IntegerType::get(getLLVMContext(), 16)); 5217 } 5218 case NEON::BI__builtin_neon_vaddlvq_s16: { 5219 Int = Intrinsic::aarch64_neon_saddlv; 5220 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5221 VTy = 5222 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5223 llvm::Type *Tys[2] = { Ty, VTy }; 5224 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5225 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5226 } 5227 case NEON::BI__builtin_neon_vsri_n_v: 5228 case NEON::BI__builtin_neon_vsriq_n_v: { 5229 Int = Intrinsic::aarch64_neon_vsri; 5230 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 5231 return EmitNeonCall(Intrin, Ops, "vsri_n"); 5232 } 5233 case NEON::BI__builtin_neon_vsli_n_v: 5234 case NEON::BI__builtin_neon_vsliq_n_v: { 5235 Int = Intrinsic::aarch64_neon_vsli; 5236 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 5237 return EmitNeonCall(Intrin, Ops, "vsli_n"); 5238 } 5239 case NEON::BI__builtin_neon_vsra_n_v: 5240 case NEON::BI__builtin_neon_vsraq_n_v: 5241 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5242 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 5243 return Builder.CreateAdd(Ops[0], Ops[1]); 5244 case NEON::BI__builtin_neon_vrsra_n_v: 5245 case NEON::BI__builtin_neon_vrsraq_n_v: { 5246 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; 5247 SmallVector<llvm::Value*,2> TmpOps; 5248 TmpOps.push_back(Ops[1]); 5249 TmpOps.push_back(Ops[2]); 5250 Function* F = CGM.getIntrinsic(Int, Ty); 5251 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true); 5252 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 5253 return Builder.CreateAdd(Ops[0], tmp); 5254 } 5255 // FIXME: Sharing loads & stores with 32-bit is complicated by the absence 5256 // of an Align parameter here. 5257 case NEON::BI__builtin_neon_vld1_x2_v: 5258 case NEON::BI__builtin_neon_vld1q_x2_v: 5259 case NEON::BI__builtin_neon_vld1_x3_v: 5260 case NEON::BI__builtin_neon_vld1q_x3_v: 5261 case NEON::BI__builtin_neon_vld1_x4_v: 5262 case NEON::BI__builtin_neon_vld1q_x4_v: { 5263 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 5264 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5265 llvm::Type *Tys[2] = { VTy, PTy }; 5266 unsigned Int; 5267 switch (BuiltinID) { 5268 case NEON::BI__builtin_neon_vld1_x2_v: 5269 case NEON::BI__builtin_neon_vld1q_x2_v: 5270 Int = Intrinsic::aarch64_neon_ld1x2; 5271 break; 5272 case NEON::BI__builtin_neon_vld1_x3_v: 5273 case NEON::BI__builtin_neon_vld1q_x3_v: 5274 Int = Intrinsic::aarch64_neon_ld1x3; 5275 break; 5276 case NEON::BI__builtin_neon_vld1_x4_v: 5277 case NEON::BI__builtin_neon_vld1q_x4_v: 5278 Int = Intrinsic::aarch64_neon_ld1x4; 5279 break; 5280 } 5281 Function *F = CGM.getIntrinsic(Int, Tys); 5282 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); 5283 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5284 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5285 return Builder.CreateStore(Ops[1], Ops[0]); 5286 } 5287 case NEON::BI__builtin_neon_vst1_x2_v: 5288 case NEON::BI__builtin_neon_vst1q_x2_v: 5289 case NEON::BI__builtin_neon_vst1_x3_v: 5290 case NEON::BI__builtin_neon_vst1q_x3_v: 5291 case NEON::BI__builtin_neon_vst1_x4_v: 5292 case NEON::BI__builtin_neon_vst1q_x4_v: { 5293 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 5294 llvm::Type *Tys[2] = { VTy, PTy }; 5295 unsigned Int; 5296 switch (BuiltinID) { 5297 case NEON::BI__builtin_neon_vst1_x2_v: 5298 case NEON::BI__builtin_neon_vst1q_x2_v: 5299 Int = Intrinsic::aarch64_neon_st1x2; 5300 break; 5301 case NEON::BI__builtin_neon_vst1_x3_v: 5302 case NEON::BI__builtin_neon_vst1q_x3_v: 5303 Int = Intrinsic::aarch64_neon_st1x3; 5304 break; 5305 case NEON::BI__builtin_neon_vst1_x4_v: 5306 case NEON::BI__builtin_neon_vst1q_x4_v: 5307 Int = Intrinsic::aarch64_neon_st1x4; 5308 break; 5309 } 5310 SmallVector<Value *, 4> IntOps(Ops.begin()+1, Ops.end()); 5311 IntOps.push_back(Ops[0]); 5312 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), IntOps, ""); 5313 } 5314 case NEON::BI__builtin_neon_vld1_v: 5315 case NEON::BI__builtin_neon_vld1q_v: 5316 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 5317 return Builder.CreateLoad(Ops[0]); 5318 case NEON::BI__builtin_neon_vst1_v: 5319 case NEON::BI__builtin_neon_vst1q_v: 5320 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 5321 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 5322 return Builder.CreateStore(Ops[1], Ops[0]); 5323 case NEON::BI__builtin_neon_vld1_lane_v: 5324 case NEON::BI__builtin_neon_vld1q_lane_v: 5325 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5326 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 5327 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5328 Ops[0] = Builder.CreateLoad(Ops[0]); 5329 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 5330 case NEON::BI__builtin_neon_vld1_dup_v: 5331 case NEON::BI__builtin_neon_vld1q_dup_v: { 5332 Value *V = UndefValue::get(Ty); 5333 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 5334 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5335 Ops[0] = Builder.CreateLoad(Ops[0]); 5336 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 5337 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 5338 return EmitNeonSplat(Ops[0], CI); 5339 } 5340 case NEON::BI__builtin_neon_vst1_lane_v: 5341 case NEON::BI__builtin_neon_vst1q_lane_v: 5342 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5343 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 5344 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5345 return Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty)); 5346 case NEON::BI__builtin_neon_vld2_v: 5347 case NEON::BI__builtin_neon_vld2q_v: { 5348 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 5349 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5350 llvm::Type *Tys[2] = { VTy, PTy }; 5351 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); 5352 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 5353 Ops[0] = Builder.CreateBitCast(Ops[0], 5354 llvm::PointerType::getUnqual(Ops[1]->getType())); 5355 return Builder.CreateStore(Ops[1], Ops[0]); 5356 } 5357 case NEON::BI__builtin_neon_vld3_v: 5358 case NEON::BI__builtin_neon_vld3q_v: { 5359 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 5360 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5361 llvm::Type *Tys[2] = { VTy, PTy }; 5362 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); 5363 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 5364 Ops[0] = Builder.CreateBitCast(Ops[0], 5365 llvm::PointerType::getUnqual(Ops[1]->getType())); 5366 return Builder.CreateStore(Ops[1], Ops[0]); 5367 } 5368 case NEON::BI__builtin_neon_vld4_v: 5369 case NEON::BI__builtin_neon_vld4q_v: { 5370 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 5371 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5372 llvm::Type *Tys[2] = { VTy, PTy }; 5373 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); 5374 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 5375 Ops[0] = Builder.CreateBitCast(Ops[0], 5376 llvm::PointerType::getUnqual(Ops[1]->getType())); 5377 return Builder.CreateStore(Ops[1], Ops[0]); 5378 } 5379 case NEON::BI__builtin_neon_vld2_dup_v: 5380 case NEON::BI__builtin_neon_vld2q_dup_v: { 5381 llvm::Type *PTy = 5382 llvm::PointerType::getUnqual(VTy->getElementType()); 5383 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5384 llvm::Type *Tys[2] = { VTy, PTy }; 5385 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); 5386 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 5387 Ops[0] = Builder.CreateBitCast(Ops[0], 5388 llvm::PointerType::getUnqual(Ops[1]->getType())); 5389 return Builder.CreateStore(Ops[1], Ops[0]); 5390 } 5391 case NEON::BI__builtin_neon_vld3_dup_v: 5392 case NEON::BI__builtin_neon_vld3q_dup_v: { 5393 llvm::Type *PTy = 5394 llvm::PointerType::getUnqual(VTy->getElementType()); 5395 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5396 llvm::Type *Tys[2] = { VTy, PTy }; 5397 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); 5398 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 5399 Ops[0] = Builder.CreateBitCast(Ops[0], 5400 llvm::PointerType::getUnqual(Ops[1]->getType())); 5401 return Builder.CreateStore(Ops[1], Ops[0]); 5402 } 5403 case NEON::BI__builtin_neon_vld4_dup_v: 5404 case NEON::BI__builtin_neon_vld4q_dup_v: { 5405 llvm::Type *PTy = 5406 llvm::PointerType::getUnqual(VTy->getElementType()); 5407 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5408 llvm::Type *Tys[2] = { VTy, PTy }; 5409 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); 5410 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 5411 Ops[0] = Builder.CreateBitCast(Ops[0], 5412 llvm::PointerType::getUnqual(Ops[1]->getType())); 5413 return Builder.CreateStore(Ops[1], Ops[0]); 5414 } 5415 case NEON::BI__builtin_neon_vld2_lane_v: 5416 case NEON::BI__builtin_neon_vld2q_lane_v: { 5417 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 5418 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); 5419 Ops.push_back(Ops[1]); 5420 Ops.erase(Ops.begin()+1); 5421 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5422 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5423 Ops[3] = Builder.CreateZExt(Ops[3], 5424 llvm::IntegerType::get(getLLVMContext(), 64)); 5425 Ops[1] = Builder.CreateCall(F, 5426 ArrayRef<Value*>(Ops).slice(1), "vld2_lane"); 5427 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5428 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5429 return Builder.CreateStore(Ops[1], Ops[0]); 5430 } 5431 case NEON::BI__builtin_neon_vld3_lane_v: 5432 case NEON::BI__builtin_neon_vld3q_lane_v: { 5433 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 5434 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); 5435 Ops.push_back(Ops[1]); 5436 Ops.erase(Ops.begin()+1); 5437 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5438 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5439 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 5440 Ops[4] = Builder.CreateZExt(Ops[4], 5441 llvm::IntegerType::get(getLLVMContext(), 64)); 5442 Ops[1] = Builder.CreateCall(F, 5443 ArrayRef<Value*>(Ops).slice(1), "vld3_lane"); 5444 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5445 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5446 return Builder.CreateStore(Ops[1], Ops[0]); 5447 } 5448 case NEON::BI__builtin_neon_vld4_lane_v: 5449 case NEON::BI__builtin_neon_vld4q_lane_v: { 5450 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 5451 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); 5452 Ops.push_back(Ops[1]); 5453 Ops.erase(Ops.begin()+1); 5454 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5455 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5456 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 5457 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 5458 Ops[5] = Builder.CreateZExt(Ops[5], 5459 llvm::IntegerType::get(getLLVMContext(), 64)); 5460 Ops[1] = Builder.CreateCall(F, 5461 ArrayRef<Value*>(Ops).slice(1), "vld4_lane"); 5462 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5463 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5464 return Builder.CreateStore(Ops[1], Ops[0]); 5465 } 5466 case NEON::BI__builtin_neon_vst2_v: 5467 case NEON::BI__builtin_neon_vst2q_v: { 5468 Ops.push_back(Ops[0]); 5469 Ops.erase(Ops.begin()); 5470 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; 5471 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), 5472 Ops, ""); 5473 } 5474 case NEON::BI__builtin_neon_vst2_lane_v: 5475 case NEON::BI__builtin_neon_vst2q_lane_v: { 5476 Ops.push_back(Ops[0]); 5477 Ops.erase(Ops.begin()); 5478 Ops[2] = Builder.CreateZExt(Ops[2], 5479 llvm::IntegerType::get(getLLVMContext(), 64)); 5480 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 5481 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), 5482 Ops, ""); 5483 } 5484 case NEON::BI__builtin_neon_vst3_v: 5485 case NEON::BI__builtin_neon_vst3q_v: { 5486 Ops.push_back(Ops[0]); 5487 Ops.erase(Ops.begin()); 5488 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 5489 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), 5490 Ops, ""); 5491 } 5492 case NEON::BI__builtin_neon_vst3_lane_v: 5493 case NEON::BI__builtin_neon_vst3q_lane_v: { 5494 Ops.push_back(Ops[0]); 5495 Ops.erase(Ops.begin()); 5496 Ops[3] = Builder.CreateZExt(Ops[3], 5497 llvm::IntegerType::get(getLLVMContext(), 64)); 5498 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 5499 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), 5500 Ops, ""); 5501 } 5502 case NEON::BI__builtin_neon_vst4_v: 5503 case NEON::BI__builtin_neon_vst4q_v: { 5504 Ops.push_back(Ops[0]); 5505 Ops.erase(Ops.begin()); 5506 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 5507 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), 5508 Ops, ""); 5509 } 5510 case NEON::BI__builtin_neon_vst4_lane_v: 5511 case NEON::BI__builtin_neon_vst4q_lane_v: { 5512 Ops.push_back(Ops[0]); 5513 Ops.erase(Ops.begin()); 5514 Ops[4] = Builder.CreateZExt(Ops[4], 5515 llvm::IntegerType::get(getLLVMContext(), 64)); 5516 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; 5517 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), 5518 Ops, ""); 5519 } 5520 case NEON::BI__builtin_neon_vtrn_v: 5521 case NEON::BI__builtin_neon_vtrnq_v: { 5522 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 5523 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5524 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5525 Value *SV = nullptr; 5526 5527 for (unsigned vi = 0; vi != 2; ++vi) { 5528 SmallVector<Constant*, 16> Indices; 5529 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 5530 Indices.push_back(ConstantInt::get(Int32Ty, i+vi)); 5531 Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi)); 5532 } 5533 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 5534 SV = llvm::ConstantVector::get(Indices); 5535 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); 5536 SV = Builder.CreateStore(SV, Addr); 5537 } 5538 return SV; 5539 } 5540 case NEON::BI__builtin_neon_vuzp_v: 5541 case NEON::BI__builtin_neon_vuzpq_v: { 5542 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 5543 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5544 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5545 Value *SV = nullptr; 5546 5547 for (unsigned vi = 0; vi != 2; ++vi) { 5548 SmallVector<Constant*, 16> Indices; 5549 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 5550 Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); 5551 5552 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 5553 SV = llvm::ConstantVector::get(Indices); 5554 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); 5555 SV = Builder.CreateStore(SV, Addr); 5556 } 5557 return SV; 5558 } 5559 case NEON::BI__builtin_neon_vzip_v: 5560 case NEON::BI__builtin_neon_vzipq_v: { 5561 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 5562 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5563 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5564 Value *SV = nullptr; 5565 5566 for (unsigned vi = 0; vi != 2; ++vi) { 5567 SmallVector<Constant*, 16> Indices; 5568 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 5569 Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); 5570 Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); 5571 } 5572 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 5573 SV = llvm::ConstantVector::get(Indices); 5574 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); 5575 SV = Builder.CreateStore(SV, Addr); 5576 } 5577 return SV; 5578 } 5579 case NEON::BI__builtin_neon_vqtbl1q_v: { 5580 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty), 5581 Ops, "vtbl1"); 5582 } 5583 case NEON::BI__builtin_neon_vqtbl2q_v: { 5584 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty), 5585 Ops, "vtbl2"); 5586 } 5587 case NEON::BI__builtin_neon_vqtbl3q_v: { 5588 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty), 5589 Ops, "vtbl3"); 5590 } 5591 case NEON::BI__builtin_neon_vqtbl4q_v: { 5592 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty), 5593 Ops, "vtbl4"); 5594 } 5595 case NEON::BI__builtin_neon_vqtbx1q_v: { 5596 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty), 5597 Ops, "vtbx1"); 5598 } 5599 case NEON::BI__builtin_neon_vqtbx2q_v: { 5600 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty), 5601 Ops, "vtbx2"); 5602 } 5603 case NEON::BI__builtin_neon_vqtbx3q_v: { 5604 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty), 5605 Ops, "vtbx3"); 5606 } 5607 case NEON::BI__builtin_neon_vqtbx4q_v: { 5608 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty), 5609 Ops, "vtbx4"); 5610 } 5611 case NEON::BI__builtin_neon_vsqadd_v: 5612 case NEON::BI__builtin_neon_vsqaddq_v: { 5613 Int = Intrinsic::aarch64_neon_usqadd; 5614 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); 5615 } 5616 case NEON::BI__builtin_neon_vuqadd_v: 5617 case NEON::BI__builtin_neon_vuqaddq_v: { 5618 Int = Intrinsic::aarch64_neon_suqadd; 5619 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); 5620 } 5621 } 5622 } 5623 5624 llvm::Value *CodeGenFunction:: 5625 BuildVector(ArrayRef<llvm::Value*> Ops) { 5626 assert((Ops.size() & (Ops.size() - 1)) == 0 && 5627 "Not a power-of-two sized vector!"); 5628 bool AllConstants = true; 5629 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 5630 AllConstants &= isa<Constant>(Ops[i]); 5631 5632 // If this is a constant vector, create a ConstantVector. 5633 if (AllConstants) { 5634 SmallVector<llvm::Constant*, 16> CstOps; 5635 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 5636 CstOps.push_back(cast<Constant>(Ops[i])); 5637 return llvm::ConstantVector::get(CstOps); 5638 } 5639 5640 // Otherwise, insertelement the values to build the vector. 5641 Value *Result = 5642 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 5643 5644 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 5645 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); 5646 5647 return Result; 5648 } 5649 5650 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 5651 const CallExpr *E) { 5652 SmallVector<Value*, 4> Ops; 5653 5654 // Find out if any arguments are required to be integer constant expressions. 5655 unsigned ICEArguments = 0; 5656 ASTContext::GetBuiltinTypeError Error; 5657 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 5658 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 5659 5660 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 5661 // If this is a normal argument, just emit it as a scalar. 5662 if ((ICEArguments & (1 << i)) == 0) { 5663 Ops.push_back(EmitScalarExpr(E->getArg(i))); 5664 continue; 5665 } 5666 5667 // If this is required to be a constant, constant fold it so that we know 5668 // that the generated intrinsic gets a ConstantInt. 5669 llvm::APSInt Result; 5670 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 5671 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 5672 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 5673 } 5674 5675 switch (BuiltinID) { 5676 default: return nullptr; 5677 case X86::BI_mm_prefetch: { 5678 Value *Address = EmitScalarExpr(E->getArg(0)); 5679 Value *RW = ConstantInt::get(Int32Ty, 0); 5680 Value *Locality = EmitScalarExpr(E->getArg(1)); 5681 Value *Data = ConstantInt::get(Int32Ty, 1); 5682 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 5683 return Builder.CreateCall4(F, Address, RW, Locality, Data); 5684 } 5685 case X86::BI__builtin_ia32_vec_init_v8qi: 5686 case X86::BI__builtin_ia32_vec_init_v4hi: 5687 case X86::BI__builtin_ia32_vec_init_v2si: 5688 return Builder.CreateBitCast(BuildVector(Ops), 5689 llvm::Type::getX86_MMXTy(getLLVMContext())); 5690 case X86::BI__builtin_ia32_vec_ext_v2si: 5691 return Builder.CreateExtractElement(Ops[0], 5692 llvm::ConstantInt::get(Ops[1]->getType(), 0)); 5693 case X86::BI__builtin_ia32_ldmxcsr: { 5694 Value *Tmp = CreateMemTemp(E->getArg(0)->getType()); 5695 Builder.CreateStore(Ops[0], Tmp); 5696 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 5697 Builder.CreateBitCast(Tmp, Int8PtrTy)); 5698 } 5699 case X86::BI__builtin_ia32_stmxcsr: { 5700 Value *Tmp = CreateMemTemp(E->getType()); 5701 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 5702 Builder.CreateBitCast(Tmp, Int8PtrTy)); 5703 return Builder.CreateLoad(Tmp, "stmxcsr"); 5704 } 5705 case X86::BI__builtin_ia32_storehps: 5706 case X86::BI__builtin_ia32_storelps: { 5707 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 5708 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 5709 5710 // cast val v2i64 5711 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 5712 5713 // extract (0, 1) 5714 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 5715 llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index); 5716 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 5717 5718 // cast pointer to i64 & store 5719 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 5720 return Builder.CreateStore(Ops[1], Ops[0]); 5721 } 5722 case X86::BI__builtin_ia32_palignr: { 5723 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 5724 5725 // If palignr is shifting the pair of input vectors less than 9 bytes, 5726 // emit a shuffle instruction. 5727 if (shiftVal <= 8) { 5728 SmallVector<llvm::Constant*, 8> Indices; 5729 for (unsigned i = 0; i != 8; ++i) 5730 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i)); 5731 5732 Value* SV = llvm::ConstantVector::get(Indices); 5733 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 5734 } 5735 5736 // If palignr is shifting the pair of input vectors more than 8 but less 5737 // than 16 bytes, emit a logical right shift of the destination. 5738 if (shiftVal < 16) { 5739 // MMX has these as 1 x i64 vectors for some odd optimization reasons. 5740 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1); 5741 5742 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 5743 Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8); 5744 5745 // create i32 constant 5746 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q); 5747 return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr"); 5748 } 5749 5750 // If palignr is shifting the pair of vectors more than 16 bytes, emit zero. 5751 return llvm::Constant::getNullValue(ConvertType(E->getType())); 5752 } 5753 case X86::BI__builtin_ia32_palignr128: { 5754 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 5755 5756 // If palignr is shifting the pair of input vectors less than 17 bytes, 5757 // emit a shuffle instruction. 5758 if (shiftVal <= 16) { 5759 SmallVector<llvm::Constant*, 16> Indices; 5760 for (unsigned i = 0; i != 16; ++i) 5761 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i)); 5762 5763 Value* SV = llvm::ConstantVector::get(Indices); 5764 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 5765 } 5766 5767 // If palignr is shifting the pair of input vectors more than 16 but less 5768 // than 32 bytes, emit a logical right shift of the destination. 5769 if (shiftVal < 32) { 5770 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 5771 5772 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 5773 Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8); 5774 5775 // create i32 constant 5776 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq); 5777 return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr"); 5778 } 5779 5780 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. 5781 return llvm::Constant::getNullValue(ConvertType(E->getType())); 5782 } 5783 case X86::BI__builtin_ia32_palignr256: { 5784 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 5785 5786 // If palignr is shifting the pair of input vectors less than 17 bytes, 5787 // emit a shuffle instruction. 5788 if (shiftVal <= 16) { 5789 SmallVector<llvm::Constant*, 32> Indices; 5790 // 256-bit palignr operates on 128-bit lanes so we need to handle that 5791 for (unsigned l = 0; l != 2; ++l) { 5792 unsigned LaneStart = l * 16; 5793 unsigned LaneEnd = (l+1) * 16; 5794 for (unsigned i = 0; i != 16; ++i) { 5795 unsigned Idx = shiftVal + i + LaneStart; 5796 if (Idx >= LaneEnd) Idx += 16; // end of lane, switch operand 5797 Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx)); 5798 } 5799 } 5800 5801 Value* SV = llvm::ConstantVector::get(Indices); 5802 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 5803 } 5804 5805 // If palignr is shifting the pair of input vectors more than 16 but less 5806 // than 32 bytes, emit a logical right shift of the destination. 5807 if (shiftVal < 32) { 5808 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 4); 5809 5810 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 5811 Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8); 5812 5813 // create i32 constant 5814 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_avx2_psrl_dq); 5815 return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr"); 5816 } 5817 5818 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. 5819 return llvm::Constant::getNullValue(ConvertType(E->getType())); 5820 } 5821 case X86::BI__builtin_ia32_movntps: 5822 case X86::BI__builtin_ia32_movntps256: 5823 case X86::BI__builtin_ia32_movntpd: 5824 case X86::BI__builtin_ia32_movntpd256: 5825 case X86::BI__builtin_ia32_movntdq: 5826 case X86::BI__builtin_ia32_movntdq256: 5827 case X86::BI__builtin_ia32_movnti: 5828 case X86::BI__builtin_ia32_movnti64: { 5829 llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(), 5830 Builder.getInt32(1)); 5831 5832 // Convert the type of the pointer to a pointer to the stored type. 5833 Value *BC = Builder.CreateBitCast(Ops[0], 5834 llvm::PointerType::getUnqual(Ops[1]->getType()), 5835 "cast"); 5836 StoreInst *SI = Builder.CreateStore(Ops[1], BC); 5837 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 5838 5839 // If the operand is an integer, we can't assume alignment. Otherwise, 5840 // assume natural alignment. 5841 QualType ArgTy = E->getArg(1)->getType(); 5842 unsigned Align; 5843 if (ArgTy->isIntegerType()) 5844 Align = 1; 5845 else 5846 Align = getContext().getTypeSizeInChars(ArgTy).getQuantity(); 5847 SI->setAlignment(Align); 5848 return SI; 5849 } 5850 // 3DNow! 5851 case X86::BI__builtin_ia32_pswapdsf: 5852 case X86::BI__builtin_ia32_pswapdsi: { 5853 const char *name = nullptr; 5854 Intrinsic::ID ID = Intrinsic::not_intrinsic; 5855 switch(BuiltinID) { 5856 default: llvm_unreachable("Unsupported intrinsic!"); 5857 case X86::BI__builtin_ia32_pswapdsf: 5858 case X86::BI__builtin_ia32_pswapdsi: 5859 name = "pswapd"; 5860 ID = Intrinsic::x86_3dnowa_pswapd; 5861 break; 5862 } 5863 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); 5864 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); 5865 llvm::Function *F = CGM.getIntrinsic(ID); 5866 return Builder.CreateCall(F, Ops, name); 5867 } 5868 case X86::BI__builtin_ia32_rdrand16_step: 5869 case X86::BI__builtin_ia32_rdrand32_step: 5870 case X86::BI__builtin_ia32_rdrand64_step: 5871 case X86::BI__builtin_ia32_rdseed16_step: 5872 case X86::BI__builtin_ia32_rdseed32_step: 5873 case X86::BI__builtin_ia32_rdseed64_step: { 5874 Intrinsic::ID ID; 5875 switch (BuiltinID) { 5876 default: llvm_unreachable("Unsupported intrinsic!"); 5877 case X86::BI__builtin_ia32_rdrand16_step: 5878 ID = Intrinsic::x86_rdrand_16; 5879 break; 5880 case X86::BI__builtin_ia32_rdrand32_step: 5881 ID = Intrinsic::x86_rdrand_32; 5882 break; 5883 case X86::BI__builtin_ia32_rdrand64_step: 5884 ID = Intrinsic::x86_rdrand_64; 5885 break; 5886 case X86::BI__builtin_ia32_rdseed16_step: 5887 ID = Intrinsic::x86_rdseed_16; 5888 break; 5889 case X86::BI__builtin_ia32_rdseed32_step: 5890 ID = Intrinsic::x86_rdseed_32; 5891 break; 5892 case X86::BI__builtin_ia32_rdseed64_step: 5893 ID = Intrinsic::x86_rdseed_64; 5894 break; 5895 } 5896 5897 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); 5898 Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]); 5899 return Builder.CreateExtractValue(Call, 1); 5900 } 5901 // AVX2 broadcast 5902 case X86::BI__builtin_ia32_vbroadcastsi256: { 5903 Value *VecTmp = CreateMemTemp(E->getArg(0)->getType()); 5904 Builder.CreateStore(Ops[0], VecTmp); 5905 Value *F = CGM.getIntrinsic(Intrinsic::x86_avx2_vbroadcasti128); 5906 return Builder.CreateCall(F, Builder.CreateBitCast(VecTmp, Int8PtrTy)); 5907 } 5908 } 5909 } 5910 5911 5912 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 5913 const CallExpr *E) { 5914 SmallVector<Value*, 4> Ops; 5915 5916 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 5917 Ops.push_back(EmitScalarExpr(E->getArg(i))); 5918 5919 Intrinsic::ID ID = Intrinsic::not_intrinsic; 5920 5921 switch (BuiltinID) { 5922 default: return nullptr; 5923 5924 // vec_ld, vec_lvsl, vec_lvsr 5925 case PPC::BI__builtin_altivec_lvx: 5926 case PPC::BI__builtin_altivec_lvxl: 5927 case PPC::BI__builtin_altivec_lvebx: 5928 case PPC::BI__builtin_altivec_lvehx: 5929 case PPC::BI__builtin_altivec_lvewx: 5930 case PPC::BI__builtin_altivec_lvsl: 5931 case PPC::BI__builtin_altivec_lvsr: 5932 { 5933 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 5934 5935 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); 5936 Ops.pop_back(); 5937 5938 switch (BuiltinID) { 5939 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); 5940 case PPC::BI__builtin_altivec_lvx: 5941 ID = Intrinsic::ppc_altivec_lvx; 5942 break; 5943 case PPC::BI__builtin_altivec_lvxl: 5944 ID = Intrinsic::ppc_altivec_lvxl; 5945 break; 5946 case PPC::BI__builtin_altivec_lvebx: 5947 ID = Intrinsic::ppc_altivec_lvebx; 5948 break; 5949 case PPC::BI__builtin_altivec_lvehx: 5950 ID = Intrinsic::ppc_altivec_lvehx; 5951 break; 5952 case PPC::BI__builtin_altivec_lvewx: 5953 ID = Intrinsic::ppc_altivec_lvewx; 5954 break; 5955 case PPC::BI__builtin_altivec_lvsl: 5956 ID = Intrinsic::ppc_altivec_lvsl; 5957 break; 5958 case PPC::BI__builtin_altivec_lvsr: 5959 ID = Intrinsic::ppc_altivec_lvsr; 5960 break; 5961 } 5962 llvm::Function *F = CGM.getIntrinsic(ID); 5963 return Builder.CreateCall(F, Ops, ""); 5964 } 5965 5966 // vec_st 5967 case PPC::BI__builtin_altivec_stvx: 5968 case PPC::BI__builtin_altivec_stvxl: 5969 case PPC::BI__builtin_altivec_stvebx: 5970 case PPC::BI__builtin_altivec_stvehx: 5971 case PPC::BI__builtin_altivec_stvewx: 5972 { 5973 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 5974 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); 5975 Ops.pop_back(); 5976 5977 switch (BuiltinID) { 5978 default: llvm_unreachable("Unsupported st intrinsic!"); 5979 case PPC::BI__builtin_altivec_stvx: 5980 ID = Intrinsic::ppc_altivec_stvx; 5981 break; 5982 case PPC::BI__builtin_altivec_stvxl: 5983 ID = Intrinsic::ppc_altivec_stvxl; 5984 break; 5985 case PPC::BI__builtin_altivec_stvebx: 5986 ID = Intrinsic::ppc_altivec_stvebx; 5987 break; 5988 case PPC::BI__builtin_altivec_stvehx: 5989 ID = Intrinsic::ppc_altivec_stvehx; 5990 break; 5991 case PPC::BI__builtin_altivec_stvewx: 5992 ID = Intrinsic::ppc_altivec_stvewx; 5993 break; 5994 } 5995 llvm::Function *F = CGM.getIntrinsic(ID); 5996 return Builder.CreateCall(F, Ops, ""); 5997 } 5998 } 5999 } 6000 6001 // Emit an intrinsic that has 1 float or double. 6002 static Value *emitUnaryFPBuiltin(CodeGenFunction &CGF, 6003 const CallExpr *E, 6004 unsigned IntrinsicID) { 6005 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 6006 6007 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 6008 return CGF.Builder.CreateCall(F, Src0); 6009 } 6010 6011 // Emit an intrinsic that has 3 float or double operands. 6012 static Value *emitTernaryFPBuiltin(CodeGenFunction &CGF, 6013 const CallExpr *E, 6014 unsigned IntrinsicID) { 6015 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 6016 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 6017 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); 6018 6019 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 6020 return CGF.Builder.CreateCall3(F, Src0, Src1, Src2); 6021 } 6022 6023 Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID, 6024 const CallExpr *E) { 6025 switch (BuiltinID) { 6026 case R600::BI__builtin_amdgpu_div_scale: 6027 case R600::BI__builtin_amdgpu_div_scalef: { 6028 // Translate from the intrinsics's struct return to the builtin's out 6029 // argument. 6030 6031 std::pair<llvm::Value *, unsigned> FlagOutPtr 6032 = EmitPointerWithAlignment(E->getArg(3)); 6033 6034 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 6035 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 6036 llvm::Value *Z = EmitScalarExpr(E->getArg(2)); 6037 6038 llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale, 6039 X->getType()); 6040 6041 llvm::Value *Tmp = Builder.CreateCall3(Callee, X, Y, Z); 6042 6043 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); 6044 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); 6045 6046 llvm::Type *RealFlagType 6047 = FlagOutPtr.first->getType()->getPointerElementType(); 6048 6049 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); 6050 llvm::StoreInst *FlagStore = Builder.CreateStore(FlagExt, FlagOutPtr.first); 6051 FlagStore->setAlignment(FlagOutPtr.second); 6052 return Result; 6053 } 6054 case R600::BI__builtin_amdgpu_div_fmas: 6055 case R600::BI__builtin_amdgpu_div_fmasf: 6056 return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fmas); 6057 case R600::BI__builtin_amdgpu_div_fixup: 6058 case R600::BI__builtin_amdgpu_div_fixupf: 6059 return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup); 6060 case R600::BI__builtin_amdgpu_trig_preop: 6061 case R600::BI__builtin_amdgpu_trig_preopf: { 6062 Value *Src0 = EmitScalarExpr(E->getArg(0)); 6063 Value *Src1 = EmitScalarExpr(E->getArg(1)); 6064 Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_trig_preop, Src0->getType()); 6065 return Builder.CreateCall2(F, Src0, Src1); 6066 } 6067 case R600::BI__builtin_amdgpu_rcp: 6068 case R600::BI__builtin_amdgpu_rcpf: 6069 return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp); 6070 case R600::BI__builtin_amdgpu_rsq: 6071 case R600::BI__builtin_amdgpu_rsqf: 6072 return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq); 6073 case R600::BI__builtin_amdgpu_rsq_clamped: 6074 case R600::BI__builtin_amdgpu_rsq_clampedf: 6075 return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped); 6076 default: 6077 return nullptr; 6078 } 6079 } 6080