1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit Builtin calls as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeGenFunction.h" 15 #include "CGObjCRuntime.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/ASTContext.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/Basic/TargetBuiltins.h" 21 #include "clang/Basic/TargetInfo.h" 22 #include "clang/CodeGen/CGFunctionInfo.h" 23 #include "llvm/ADT/StringExtras.h" 24 #include "llvm/IR/CallSite.h" 25 #include "llvm/IR/DataLayout.h" 26 #include "llvm/IR/InlineAsm.h" 27 #include "llvm/IR/Intrinsics.h" 28 29 using namespace clang; 30 using namespace CodeGen; 31 using namespace llvm; 32 33 /// getBuiltinLibFunction - Given a builtin id for a function like 34 /// "__builtin_fabsf", return a Function* for "fabsf". 35 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, 36 unsigned BuiltinID) { 37 assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); 38 39 // Get the name, skip over the __builtin_ prefix (if necessary). 40 StringRef Name; 41 GlobalDecl D(FD); 42 43 // If the builtin has been declared explicitly with an assembler label, 44 // use the mangled name. This differs from the plain label on platforms 45 // that prefix labels. 46 if (FD->hasAttr<AsmLabelAttr>()) 47 Name = getMangledName(D); 48 else 49 Name = Context.BuiltinInfo.GetName(BuiltinID) + 10; 50 51 llvm::FunctionType *Ty = 52 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); 53 54 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); 55 } 56 57 /// Emit the conversions required to turn the given value into an 58 /// integer of the given size. 59 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 60 QualType T, llvm::IntegerType *IntType) { 61 V = CGF.EmitToMemory(V, T); 62 63 if (V->getType()->isPointerTy()) 64 return CGF.Builder.CreatePtrToInt(V, IntType); 65 66 assert(V->getType() == IntType); 67 return V; 68 } 69 70 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 71 QualType T, llvm::Type *ResultType) { 72 V = CGF.EmitFromMemory(V, T); 73 74 if (ResultType->isPointerTy()) 75 return CGF.Builder.CreateIntToPtr(V, ResultType); 76 77 assert(V->getType() == ResultType); 78 return V; 79 } 80 81 /// Utility to insert an atomic instruction based on Instrinsic::ID 82 /// and the expression node. 83 static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 84 llvm::AtomicRMWInst::BinOp Kind, 85 const CallExpr *E) { 86 QualType T = E->getType(); 87 assert(E->getArg(0)->getType()->isPointerType()); 88 assert(CGF.getContext().hasSameUnqualifiedType(T, 89 E->getArg(0)->getType()->getPointeeType())); 90 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 91 92 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 93 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 94 95 llvm::IntegerType *IntType = 96 llvm::IntegerType::get(CGF.getLLVMContext(), 97 CGF.getContext().getTypeSize(T)); 98 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 99 100 llvm::Value *Args[2]; 101 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 102 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 103 llvm::Type *ValueType = Args[1]->getType(); 104 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 105 106 llvm::Value *Result = 107 CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1], 108 llvm::SequentiallyConsistent); 109 Result = EmitFromInt(CGF, Result, T, ValueType); 110 return RValue::get(Result); 111 } 112 113 /// Utility to insert an atomic instruction based Instrinsic::ID and 114 /// the expression node, where the return value is the result of the 115 /// operation. 116 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 117 llvm::AtomicRMWInst::BinOp Kind, 118 const CallExpr *E, 119 Instruction::BinaryOps Op, 120 bool Invert = false) { 121 QualType T = E->getType(); 122 assert(E->getArg(0)->getType()->isPointerType()); 123 assert(CGF.getContext().hasSameUnqualifiedType(T, 124 E->getArg(0)->getType()->getPointeeType())); 125 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 126 127 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 128 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 129 130 llvm::IntegerType *IntType = 131 llvm::IntegerType::get(CGF.getLLVMContext(), 132 CGF.getContext().getTypeSize(T)); 133 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 134 135 llvm::Value *Args[2]; 136 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 137 llvm::Type *ValueType = Args[1]->getType(); 138 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 139 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 140 141 llvm::Value *Result = 142 CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1], 143 llvm::SequentiallyConsistent); 144 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 145 if (Invert) 146 Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, 147 llvm::ConstantInt::get(IntType, -1)); 148 Result = EmitFromInt(CGF, Result, T, ValueType); 149 return RValue::get(Result); 150 } 151 152 /// EmitFAbs - Emit a call to @llvm.fabs(). 153 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { 154 Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); 155 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); 156 Call->setDoesNotAccessMemory(); 157 return Call; 158 } 159 160 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn, 161 const CallExpr *E, llvm::Value *calleeValue) { 162 return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E, 163 ReturnValueSlot(), Fn); 164 } 165 166 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* 167 /// depending on IntrinsicID. 168 /// 169 /// \arg CGF The current codegen function. 170 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. 171 /// \arg X The first argument to the llvm.*.with.overflow.*. 172 /// \arg Y The second argument to the llvm.*.with.overflow.*. 173 /// \arg Carry The carry returned by the llvm.*.with.overflow.*. 174 /// \returns The result (i.e. sum/product) returned by the intrinsic. 175 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, 176 const llvm::Intrinsic::ID IntrinsicID, 177 llvm::Value *X, llvm::Value *Y, 178 llvm::Value *&Carry) { 179 // Make sure we have integers of the same width. 180 assert(X->getType() == Y->getType() && 181 "Arguments must be the same type. (Did you forget to make sure both " 182 "arguments have the same integer width?)"); 183 184 llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); 185 llvm::Value *Tmp = CGF.Builder.CreateCall2(Callee, X, Y); 186 Carry = CGF.Builder.CreateExtractValue(Tmp, 1); 187 return CGF.Builder.CreateExtractValue(Tmp, 0); 188 } 189 190 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 191 unsigned BuiltinID, const CallExpr *E, 192 ReturnValueSlot ReturnValue) { 193 // See if we can constant fold this builtin. If so, don't emit it at all. 194 Expr::EvalResult Result; 195 if (E->EvaluateAsRValue(Result, CGM.getContext()) && 196 !Result.hasSideEffects()) { 197 if (Result.Val.isInt()) 198 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 199 Result.Val.getInt())); 200 if (Result.Val.isFloat()) 201 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 202 Result.Val.getFloat())); 203 } 204 205 switch (BuiltinID) { 206 default: break; // Handle intrinsics and libm functions below. 207 case Builtin::BI__builtin___CFStringMakeConstantString: 208 case Builtin::BI__builtin___NSStringMakeConstantString: 209 return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr)); 210 case Builtin::BI__builtin_stdarg_start: 211 case Builtin::BI__builtin_va_start: 212 case Builtin::BI__va_start: 213 case Builtin::BI__builtin_va_end: { 214 Value *ArgValue = (BuiltinID == Builtin::BI__va_start) 215 ? EmitScalarExpr(E->getArg(0)) 216 : EmitVAListRef(E->getArg(0)); 217 llvm::Type *DestType = Int8PtrTy; 218 if (ArgValue->getType() != DestType) 219 ArgValue = Builder.CreateBitCast(ArgValue, DestType, 220 ArgValue->getName().data()); 221 222 Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ? 223 Intrinsic::vaend : Intrinsic::vastart; 224 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue)); 225 } 226 case Builtin::BI__builtin_va_copy: { 227 Value *DstPtr = EmitVAListRef(E->getArg(0)); 228 Value *SrcPtr = EmitVAListRef(E->getArg(1)); 229 230 llvm::Type *Type = Int8PtrTy; 231 232 DstPtr = Builder.CreateBitCast(DstPtr, Type); 233 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 234 return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy), 235 DstPtr, SrcPtr)); 236 } 237 case Builtin::BI__builtin_abs: 238 case Builtin::BI__builtin_labs: 239 case Builtin::BI__builtin_llabs: { 240 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 241 242 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 243 Value *CmpResult = 244 Builder.CreateICmpSGE(ArgValue, 245 llvm::Constant::getNullValue(ArgValue->getType()), 246 "abscond"); 247 Value *Result = 248 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 249 250 return RValue::get(Result); 251 } 252 case Builtin::BI__builtin_fabs: 253 case Builtin::BI__builtin_fabsf: 254 case Builtin::BI__builtin_fabsl: { 255 Value *Arg1 = EmitScalarExpr(E->getArg(0)); 256 Value *Result = EmitFAbs(*this, Arg1); 257 return RValue::get(Result); 258 } 259 case Builtin::BI__builtin_fmod: 260 case Builtin::BI__builtin_fmodf: 261 case Builtin::BI__builtin_fmodl: { 262 Value *Arg1 = EmitScalarExpr(E->getArg(0)); 263 Value *Arg2 = EmitScalarExpr(E->getArg(1)); 264 Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod"); 265 return RValue::get(Result); 266 } 267 268 case Builtin::BI__builtin_conj: 269 case Builtin::BI__builtin_conjf: 270 case Builtin::BI__builtin_conjl: { 271 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 272 Value *Real = ComplexVal.first; 273 Value *Imag = ComplexVal.second; 274 Value *Zero = 275 Imag->getType()->isFPOrFPVectorTy() 276 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) 277 : llvm::Constant::getNullValue(Imag->getType()); 278 279 Imag = Builder.CreateFSub(Zero, Imag, "sub"); 280 return RValue::getComplex(std::make_pair(Real, Imag)); 281 } 282 case Builtin::BI__builtin_creal: 283 case Builtin::BI__builtin_crealf: 284 case Builtin::BI__builtin_creall: 285 case Builtin::BIcreal: 286 case Builtin::BIcrealf: 287 case Builtin::BIcreall: { 288 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 289 return RValue::get(ComplexVal.first); 290 } 291 292 case Builtin::BI__builtin_cimag: 293 case Builtin::BI__builtin_cimagf: 294 case Builtin::BI__builtin_cimagl: 295 case Builtin::BIcimag: 296 case Builtin::BIcimagf: 297 case Builtin::BIcimagl: { 298 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 299 return RValue::get(ComplexVal.second); 300 } 301 302 case Builtin::BI__builtin_ctzs: 303 case Builtin::BI__builtin_ctz: 304 case Builtin::BI__builtin_ctzl: 305 case Builtin::BI__builtin_ctzll: { 306 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 307 308 llvm::Type *ArgType = ArgValue->getType(); 309 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 310 311 llvm::Type *ResultType = ConvertType(E->getType()); 312 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 313 Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef); 314 if (Result->getType() != ResultType) 315 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 316 "cast"); 317 return RValue::get(Result); 318 } 319 case Builtin::BI__builtin_clzs: 320 case Builtin::BI__builtin_clz: 321 case Builtin::BI__builtin_clzl: 322 case Builtin::BI__builtin_clzll: { 323 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 324 325 llvm::Type *ArgType = ArgValue->getType(); 326 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 327 328 llvm::Type *ResultType = ConvertType(E->getType()); 329 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 330 Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef); 331 if (Result->getType() != ResultType) 332 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 333 "cast"); 334 return RValue::get(Result); 335 } 336 case Builtin::BI__builtin_ffs: 337 case Builtin::BI__builtin_ffsl: 338 case Builtin::BI__builtin_ffsll: { 339 // ffs(x) -> x ? cttz(x) + 1 : 0 340 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 341 342 llvm::Type *ArgType = ArgValue->getType(); 343 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 344 345 llvm::Type *ResultType = ConvertType(E->getType()); 346 Value *Tmp = Builder.CreateAdd(Builder.CreateCall2(F, ArgValue, 347 Builder.getTrue()), 348 llvm::ConstantInt::get(ArgType, 1)); 349 Value *Zero = llvm::Constant::getNullValue(ArgType); 350 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 351 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 352 if (Result->getType() != ResultType) 353 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 354 "cast"); 355 return RValue::get(Result); 356 } 357 case Builtin::BI__builtin_parity: 358 case Builtin::BI__builtin_parityl: 359 case Builtin::BI__builtin_parityll: { 360 // parity(x) -> ctpop(x) & 1 361 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 362 363 llvm::Type *ArgType = ArgValue->getType(); 364 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 365 366 llvm::Type *ResultType = ConvertType(E->getType()); 367 Value *Tmp = Builder.CreateCall(F, ArgValue); 368 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 369 if (Result->getType() != ResultType) 370 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 371 "cast"); 372 return RValue::get(Result); 373 } 374 case Builtin::BI__builtin_popcount: 375 case Builtin::BI__builtin_popcountl: 376 case Builtin::BI__builtin_popcountll: { 377 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 378 379 llvm::Type *ArgType = ArgValue->getType(); 380 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 381 382 llvm::Type *ResultType = ConvertType(E->getType()); 383 Value *Result = Builder.CreateCall(F, ArgValue); 384 if (Result->getType() != ResultType) 385 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 386 "cast"); 387 return RValue::get(Result); 388 } 389 case Builtin::BI__builtin_expect: { 390 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 391 llvm::Type *ArgType = ArgValue->getType(); 392 393 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 394 // Don't generate llvm.expect on -O0 as the backend won't use it for 395 // anything. 396 // Note, we still IRGen ExpectedValue because it could have side-effects. 397 if (CGM.getCodeGenOpts().OptimizationLevel == 0) 398 return RValue::get(ArgValue); 399 400 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); 401 Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue, 402 "expval"); 403 return RValue::get(Result); 404 } 405 case Builtin::BI__builtin_assume_aligned: { 406 Value *PtrValue = EmitScalarExpr(E->getArg(0)); 407 Value *OffsetValue = 408 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; 409 410 Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); 411 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); 412 unsigned Alignment = (unsigned) AlignmentCI->getZExtValue(); 413 414 EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue); 415 return RValue::get(PtrValue); 416 } 417 case Builtin::BI__assume: 418 case Builtin::BI__builtin_assume: { 419 if (E->getArg(0)->HasSideEffects(getContext())) 420 return RValue::get(nullptr); 421 422 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 423 Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume); 424 return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); 425 } 426 case Builtin::BI__builtin_bswap16: 427 case Builtin::BI__builtin_bswap32: 428 case Builtin::BI__builtin_bswap64: { 429 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 430 llvm::Type *ArgType = ArgValue->getType(); 431 Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType); 432 return RValue::get(Builder.CreateCall(F, ArgValue)); 433 } 434 case Builtin::BI__builtin_object_size: { 435 // We rely on constant folding to deal with expressions with side effects. 436 assert(!E->getArg(0)->HasSideEffects(getContext()) && 437 "should have been constant folded"); 438 439 // We pass this builtin onto the optimizer so that it can 440 // figure out the object size in more complex cases. 441 llvm::Type *ResType = ConvertType(E->getType()); 442 443 // LLVM only supports 0 and 2, make sure that we pass along that 444 // as a boolean. 445 Value *Ty = EmitScalarExpr(E->getArg(1)); 446 ConstantInt *CI = dyn_cast<ConstantInt>(Ty); 447 assert(CI); 448 uint64_t val = CI->getZExtValue(); 449 CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1); 450 // FIXME: Get right address space. 451 llvm::Type *Tys[] = { ResType, Builder.getInt8PtrTy(0) }; 452 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys); 453 return RValue::get(Builder.CreateCall2(F, EmitScalarExpr(E->getArg(0)),CI)); 454 } 455 case Builtin::BI__builtin_prefetch: { 456 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 457 // FIXME: Technically these constants should of type 'int', yes? 458 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 459 llvm::ConstantInt::get(Int32Ty, 0); 460 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 461 llvm::ConstantInt::get(Int32Ty, 3); 462 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 463 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 464 return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data)); 465 } 466 case Builtin::BI__builtin_readcyclecounter: { 467 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); 468 return RValue::get(Builder.CreateCall(F)); 469 } 470 case Builtin::BI__builtin___clear_cache: { 471 Value *Begin = EmitScalarExpr(E->getArg(0)); 472 Value *End = EmitScalarExpr(E->getArg(1)); 473 Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); 474 return RValue::get(Builder.CreateCall2(F, Begin, End)); 475 } 476 case Builtin::BI__builtin_trap: { 477 Value *F = CGM.getIntrinsic(Intrinsic::trap); 478 return RValue::get(Builder.CreateCall(F)); 479 } 480 case Builtin::BI__debugbreak: { 481 Value *F = CGM.getIntrinsic(Intrinsic::debugtrap); 482 return RValue::get(Builder.CreateCall(F)); 483 } 484 case Builtin::BI__builtin_unreachable: { 485 if (SanOpts.has(SanitizerKind::Unreachable)) { 486 SanitizerScope SanScope(this); 487 EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()), 488 SanitizerKind::Unreachable), 489 "builtin_unreachable", EmitCheckSourceLocation(E->getExprLoc()), 490 None); 491 } else 492 Builder.CreateUnreachable(); 493 494 // We do need to preserve an insertion point. 495 EmitBlock(createBasicBlock("unreachable.cont")); 496 497 return RValue::get(nullptr); 498 } 499 500 case Builtin::BI__builtin_powi: 501 case Builtin::BI__builtin_powif: 502 case Builtin::BI__builtin_powil: { 503 Value *Base = EmitScalarExpr(E->getArg(0)); 504 Value *Exponent = EmitScalarExpr(E->getArg(1)); 505 llvm::Type *ArgType = Base->getType(); 506 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); 507 return RValue::get(Builder.CreateCall2(F, Base, Exponent)); 508 } 509 510 case Builtin::BI__builtin_isgreater: 511 case Builtin::BI__builtin_isgreaterequal: 512 case Builtin::BI__builtin_isless: 513 case Builtin::BI__builtin_islessequal: 514 case Builtin::BI__builtin_islessgreater: 515 case Builtin::BI__builtin_isunordered: { 516 // Ordered comparisons: we know the arguments to these are matching scalar 517 // floating point values. 518 Value *LHS = EmitScalarExpr(E->getArg(0)); 519 Value *RHS = EmitScalarExpr(E->getArg(1)); 520 521 switch (BuiltinID) { 522 default: llvm_unreachable("Unknown ordered comparison"); 523 case Builtin::BI__builtin_isgreater: 524 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 525 break; 526 case Builtin::BI__builtin_isgreaterequal: 527 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 528 break; 529 case Builtin::BI__builtin_isless: 530 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 531 break; 532 case Builtin::BI__builtin_islessequal: 533 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 534 break; 535 case Builtin::BI__builtin_islessgreater: 536 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 537 break; 538 case Builtin::BI__builtin_isunordered: 539 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 540 break; 541 } 542 // ZExt bool to int type. 543 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); 544 } 545 case Builtin::BI__builtin_isnan: { 546 Value *V = EmitScalarExpr(E->getArg(0)); 547 V = Builder.CreateFCmpUNO(V, V, "cmp"); 548 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 549 } 550 551 case Builtin::BI__builtin_isinf: { 552 // isinf(x) --> fabs(x) == infinity 553 Value *V = EmitScalarExpr(E->getArg(0)); 554 V = EmitFAbs(*this, V); 555 556 V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf"); 557 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 558 } 559 560 // TODO: BI__builtin_isinf_sign 561 // isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0 562 563 case Builtin::BI__builtin_isnormal: { 564 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 565 Value *V = EmitScalarExpr(E->getArg(0)); 566 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 567 568 Value *Abs = EmitFAbs(*this, V); 569 Value *IsLessThanInf = 570 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 571 APFloat Smallest = APFloat::getSmallestNormalized( 572 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 573 Value *IsNormal = 574 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 575 "isnormal"); 576 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 577 V = Builder.CreateAnd(V, IsNormal, "and"); 578 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 579 } 580 581 case Builtin::BI__builtin_isfinite: { 582 // isfinite(x) --> x == x && fabs(x) != infinity; 583 Value *V = EmitScalarExpr(E->getArg(0)); 584 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 585 586 Value *Abs = EmitFAbs(*this, V); 587 Value *IsNotInf = 588 Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 589 590 V = Builder.CreateAnd(Eq, IsNotInf, "and"); 591 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 592 } 593 594 case Builtin::BI__builtin_fpclassify: { 595 Value *V = EmitScalarExpr(E->getArg(5)); 596 llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 597 598 // Create Result 599 BasicBlock *Begin = Builder.GetInsertBlock(); 600 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 601 Builder.SetInsertPoint(End); 602 PHINode *Result = 603 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 604 "fpclassify_result"); 605 606 // if (V==0) return FP_ZERO 607 Builder.SetInsertPoint(Begin); 608 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 609 "iszero"); 610 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 611 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 612 Builder.CreateCondBr(IsZero, End, NotZero); 613 Result->addIncoming(ZeroLiteral, Begin); 614 615 // if (V != V) return FP_NAN 616 Builder.SetInsertPoint(NotZero); 617 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 618 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 619 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 620 Builder.CreateCondBr(IsNan, End, NotNan); 621 Result->addIncoming(NanLiteral, NotZero); 622 623 // if (fabs(V) == infinity) return FP_INFINITY 624 Builder.SetInsertPoint(NotNan); 625 Value *VAbs = EmitFAbs(*this, V); 626 Value *IsInf = 627 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 628 "isinf"); 629 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 630 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 631 Builder.CreateCondBr(IsInf, End, NotInf); 632 Result->addIncoming(InfLiteral, NotNan); 633 634 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 635 Builder.SetInsertPoint(NotInf); 636 APFloat Smallest = APFloat::getSmallestNormalized( 637 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 638 Value *IsNormal = 639 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 640 "isnormal"); 641 Value *NormalResult = 642 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 643 EmitScalarExpr(E->getArg(3))); 644 Builder.CreateBr(End); 645 Result->addIncoming(NormalResult, NotInf); 646 647 // return Result 648 Builder.SetInsertPoint(End); 649 return RValue::get(Result); 650 } 651 652 case Builtin::BIalloca: 653 case Builtin::BI_alloca: 654 case Builtin::BI__builtin_alloca: { 655 Value *Size = EmitScalarExpr(E->getArg(0)); 656 return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size)); 657 } 658 case Builtin::BIbzero: 659 case Builtin::BI__builtin_bzero: { 660 std::pair<llvm::Value*, unsigned> Dest = 661 EmitPointerWithAlignment(E->getArg(0)); 662 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 663 Builder.CreateMemSet(Dest.first, Builder.getInt8(0), SizeVal, 664 Dest.second, false); 665 return RValue::get(Dest.first); 666 } 667 case Builtin::BImemcpy: 668 case Builtin::BI__builtin_memcpy: { 669 std::pair<llvm::Value*, unsigned> Dest = 670 EmitPointerWithAlignment(E->getArg(0)); 671 std::pair<llvm::Value*, unsigned> Src = 672 EmitPointerWithAlignment(E->getArg(1)); 673 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 674 unsigned Align = std::min(Dest.second, Src.second); 675 Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false); 676 return RValue::get(Dest.first); 677 } 678 679 case Builtin::BI__builtin___memcpy_chk: { 680 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. 681 llvm::APSInt Size, DstSize; 682 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 683 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 684 break; 685 if (Size.ugt(DstSize)) 686 break; 687 std::pair<llvm::Value*, unsigned> Dest = 688 EmitPointerWithAlignment(E->getArg(0)); 689 std::pair<llvm::Value*, unsigned> Src = 690 EmitPointerWithAlignment(E->getArg(1)); 691 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 692 unsigned Align = std::min(Dest.second, Src.second); 693 Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false); 694 return RValue::get(Dest.first); 695 } 696 697 case Builtin::BI__builtin_objc_memmove_collectable: { 698 Value *Address = EmitScalarExpr(E->getArg(0)); 699 Value *SrcAddr = EmitScalarExpr(E->getArg(1)); 700 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 701 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 702 Address, SrcAddr, SizeVal); 703 return RValue::get(Address); 704 } 705 706 case Builtin::BI__builtin___memmove_chk: { 707 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. 708 llvm::APSInt Size, DstSize; 709 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 710 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 711 break; 712 if (Size.ugt(DstSize)) 713 break; 714 std::pair<llvm::Value*, unsigned> Dest = 715 EmitPointerWithAlignment(E->getArg(0)); 716 std::pair<llvm::Value*, unsigned> Src = 717 EmitPointerWithAlignment(E->getArg(1)); 718 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 719 unsigned Align = std::min(Dest.second, Src.second); 720 Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false); 721 return RValue::get(Dest.first); 722 } 723 724 case Builtin::BImemmove: 725 case Builtin::BI__builtin_memmove: { 726 std::pair<llvm::Value*, unsigned> Dest = 727 EmitPointerWithAlignment(E->getArg(0)); 728 std::pair<llvm::Value*, unsigned> Src = 729 EmitPointerWithAlignment(E->getArg(1)); 730 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 731 unsigned Align = std::min(Dest.second, Src.second); 732 Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false); 733 return RValue::get(Dest.first); 734 } 735 case Builtin::BImemset: 736 case Builtin::BI__builtin_memset: { 737 std::pair<llvm::Value*, unsigned> Dest = 738 EmitPointerWithAlignment(E->getArg(0)); 739 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 740 Builder.getInt8Ty()); 741 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 742 Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false); 743 return RValue::get(Dest.first); 744 } 745 case Builtin::BI__builtin___memset_chk: { 746 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 747 llvm::APSInt Size, DstSize; 748 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 749 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 750 break; 751 if (Size.ugt(DstSize)) 752 break; 753 std::pair<llvm::Value*, unsigned> Dest = 754 EmitPointerWithAlignment(E->getArg(0)); 755 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 756 Builder.getInt8Ty()); 757 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 758 Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false); 759 return RValue::get(Dest.first); 760 } 761 case Builtin::BI__builtin_dwarf_cfa: { 762 // The offset in bytes from the first argument to the CFA. 763 // 764 // Why on earth is this in the frontend? Is there any reason at 765 // all that the backend can't reasonably determine this while 766 // lowering llvm.eh.dwarf.cfa()? 767 // 768 // TODO: If there's a satisfactory reason, add a target hook for 769 // this instead of hard-coding 0, which is correct for most targets. 770 int32_t Offset = 0; 771 772 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); 773 return RValue::get(Builder.CreateCall(F, 774 llvm::ConstantInt::get(Int32Ty, Offset))); 775 } 776 case Builtin::BI__builtin_return_address: { 777 Value *Depth = EmitScalarExpr(E->getArg(0)); 778 Depth = Builder.CreateIntCast(Depth, Int32Ty, false); 779 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 780 return RValue::get(Builder.CreateCall(F, Depth)); 781 } 782 case Builtin::BI__builtin_frame_address: { 783 Value *Depth = EmitScalarExpr(E->getArg(0)); 784 Depth = Builder.CreateIntCast(Depth, Int32Ty, false); 785 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); 786 return RValue::get(Builder.CreateCall(F, Depth)); 787 } 788 case Builtin::BI__builtin_extract_return_addr: { 789 Value *Address = EmitScalarExpr(E->getArg(0)); 790 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 791 return RValue::get(Result); 792 } 793 case Builtin::BI__builtin_frob_return_addr: { 794 Value *Address = EmitScalarExpr(E->getArg(0)); 795 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 796 return RValue::get(Result); 797 } 798 case Builtin::BI__builtin_dwarf_sp_column: { 799 llvm::IntegerType *Ty 800 = cast<llvm::IntegerType>(ConvertType(E->getType())); 801 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 802 if (Column == -1) { 803 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 804 return RValue::get(llvm::UndefValue::get(Ty)); 805 } 806 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 807 } 808 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 809 Value *Address = EmitScalarExpr(E->getArg(0)); 810 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 811 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 812 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 813 } 814 case Builtin::BI__builtin_eh_return: { 815 Value *Int = EmitScalarExpr(E->getArg(0)); 816 Value *Ptr = EmitScalarExpr(E->getArg(1)); 817 818 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 819 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 820 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 821 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 822 ? Intrinsic::eh_return_i32 823 : Intrinsic::eh_return_i64); 824 Builder.CreateCall2(F, Int, Ptr); 825 Builder.CreateUnreachable(); 826 827 // We do need to preserve an insertion point. 828 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 829 830 return RValue::get(nullptr); 831 } 832 case Builtin::BI__builtin_unwind_init: { 833 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); 834 return RValue::get(Builder.CreateCall(F)); 835 } 836 case Builtin::BI__builtin_extend_pointer: { 837 // Extends a pointer to the size of an _Unwind_Word, which is 838 // uint64_t on all platforms. Generally this gets poked into a 839 // register and eventually used as an address, so if the 840 // addressing registers are wider than pointers and the platform 841 // doesn't implicitly ignore high-order bits when doing 842 // addressing, we need to make sure we zext / sext based on 843 // the platform's expectations. 844 // 845 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 846 847 // Cast the pointer to intptr_t. 848 Value *Ptr = EmitScalarExpr(E->getArg(0)); 849 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 850 851 // If that's 64 bits, we're done. 852 if (IntPtrTy->getBitWidth() == 64) 853 return RValue::get(Result); 854 855 // Otherwise, ask the codegen data what to do. 856 if (getTargetHooks().extendPointerWithSExt()) 857 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 858 else 859 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 860 } 861 case Builtin::BI__builtin_setjmp: { 862 if (!getTargetHooks().hasSjLjLowering(*this)) 863 break; 864 // Buffer is a void**. 865 Value *Buf = EmitScalarExpr(E->getArg(0)); 866 867 // Store the frame pointer to the setjmp buffer. 868 Value *FrameAddr = 869 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 870 ConstantInt::get(Int32Ty, 0)); 871 Builder.CreateStore(FrameAddr, Buf); 872 873 // Store the stack pointer to the setjmp buffer. 874 Value *StackAddr = 875 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 876 Value *StackSaveSlot = 877 Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2)); 878 Builder.CreateStore(StackAddr, StackSaveSlot); 879 880 // Call LLVM's EH setjmp, which is lightweight. 881 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 882 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 883 return RValue::get(Builder.CreateCall(F, Buf)); 884 } 885 case Builtin::BI__builtin_longjmp: { 886 if (!getTargetHooks().hasSjLjLowering(*this)) 887 break; 888 Value *Buf = EmitScalarExpr(E->getArg(0)); 889 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 890 891 // Call LLVM's EH longjmp, which is lightweight. 892 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 893 894 // longjmp doesn't return; mark this as unreachable. 895 Builder.CreateUnreachable(); 896 897 // We do need to preserve an insertion point. 898 EmitBlock(createBasicBlock("longjmp.cont")); 899 900 return RValue::get(nullptr); 901 } 902 case Builtin::BI__sync_fetch_and_add: 903 case Builtin::BI__sync_fetch_and_sub: 904 case Builtin::BI__sync_fetch_and_or: 905 case Builtin::BI__sync_fetch_and_and: 906 case Builtin::BI__sync_fetch_and_xor: 907 case Builtin::BI__sync_fetch_and_nand: 908 case Builtin::BI__sync_add_and_fetch: 909 case Builtin::BI__sync_sub_and_fetch: 910 case Builtin::BI__sync_and_and_fetch: 911 case Builtin::BI__sync_or_and_fetch: 912 case Builtin::BI__sync_xor_and_fetch: 913 case Builtin::BI__sync_nand_and_fetch: 914 case Builtin::BI__sync_val_compare_and_swap: 915 case Builtin::BI__sync_bool_compare_and_swap: 916 case Builtin::BI__sync_lock_test_and_set: 917 case Builtin::BI__sync_lock_release: 918 case Builtin::BI__sync_swap: 919 llvm_unreachable("Shouldn't make it through sema"); 920 case Builtin::BI__sync_fetch_and_add_1: 921 case Builtin::BI__sync_fetch_and_add_2: 922 case Builtin::BI__sync_fetch_and_add_4: 923 case Builtin::BI__sync_fetch_and_add_8: 924 case Builtin::BI__sync_fetch_and_add_16: 925 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); 926 case Builtin::BI__sync_fetch_and_sub_1: 927 case Builtin::BI__sync_fetch_and_sub_2: 928 case Builtin::BI__sync_fetch_and_sub_4: 929 case Builtin::BI__sync_fetch_and_sub_8: 930 case Builtin::BI__sync_fetch_and_sub_16: 931 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); 932 case Builtin::BI__sync_fetch_and_or_1: 933 case Builtin::BI__sync_fetch_and_or_2: 934 case Builtin::BI__sync_fetch_and_or_4: 935 case Builtin::BI__sync_fetch_and_or_8: 936 case Builtin::BI__sync_fetch_and_or_16: 937 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); 938 case Builtin::BI__sync_fetch_and_and_1: 939 case Builtin::BI__sync_fetch_and_and_2: 940 case Builtin::BI__sync_fetch_and_and_4: 941 case Builtin::BI__sync_fetch_and_and_8: 942 case Builtin::BI__sync_fetch_and_and_16: 943 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); 944 case Builtin::BI__sync_fetch_and_xor_1: 945 case Builtin::BI__sync_fetch_and_xor_2: 946 case Builtin::BI__sync_fetch_and_xor_4: 947 case Builtin::BI__sync_fetch_and_xor_8: 948 case Builtin::BI__sync_fetch_and_xor_16: 949 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); 950 case Builtin::BI__sync_fetch_and_nand_1: 951 case Builtin::BI__sync_fetch_and_nand_2: 952 case Builtin::BI__sync_fetch_and_nand_4: 953 case Builtin::BI__sync_fetch_and_nand_8: 954 case Builtin::BI__sync_fetch_and_nand_16: 955 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); 956 957 // Clang extensions: not overloaded yet. 958 case Builtin::BI__sync_fetch_and_min: 959 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); 960 case Builtin::BI__sync_fetch_and_max: 961 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); 962 case Builtin::BI__sync_fetch_and_umin: 963 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); 964 case Builtin::BI__sync_fetch_and_umax: 965 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); 966 967 case Builtin::BI__sync_add_and_fetch_1: 968 case Builtin::BI__sync_add_and_fetch_2: 969 case Builtin::BI__sync_add_and_fetch_4: 970 case Builtin::BI__sync_add_and_fetch_8: 971 case Builtin::BI__sync_add_and_fetch_16: 972 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, 973 llvm::Instruction::Add); 974 case Builtin::BI__sync_sub_and_fetch_1: 975 case Builtin::BI__sync_sub_and_fetch_2: 976 case Builtin::BI__sync_sub_and_fetch_4: 977 case Builtin::BI__sync_sub_and_fetch_8: 978 case Builtin::BI__sync_sub_and_fetch_16: 979 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, 980 llvm::Instruction::Sub); 981 case Builtin::BI__sync_and_and_fetch_1: 982 case Builtin::BI__sync_and_and_fetch_2: 983 case Builtin::BI__sync_and_and_fetch_4: 984 case Builtin::BI__sync_and_and_fetch_8: 985 case Builtin::BI__sync_and_and_fetch_16: 986 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, 987 llvm::Instruction::And); 988 case Builtin::BI__sync_or_and_fetch_1: 989 case Builtin::BI__sync_or_and_fetch_2: 990 case Builtin::BI__sync_or_and_fetch_4: 991 case Builtin::BI__sync_or_and_fetch_8: 992 case Builtin::BI__sync_or_and_fetch_16: 993 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, 994 llvm::Instruction::Or); 995 case Builtin::BI__sync_xor_and_fetch_1: 996 case Builtin::BI__sync_xor_and_fetch_2: 997 case Builtin::BI__sync_xor_and_fetch_4: 998 case Builtin::BI__sync_xor_and_fetch_8: 999 case Builtin::BI__sync_xor_and_fetch_16: 1000 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, 1001 llvm::Instruction::Xor); 1002 case Builtin::BI__sync_nand_and_fetch_1: 1003 case Builtin::BI__sync_nand_and_fetch_2: 1004 case Builtin::BI__sync_nand_and_fetch_4: 1005 case Builtin::BI__sync_nand_and_fetch_8: 1006 case Builtin::BI__sync_nand_and_fetch_16: 1007 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E, 1008 llvm::Instruction::And, true); 1009 1010 case Builtin::BI__sync_val_compare_and_swap_1: 1011 case Builtin::BI__sync_val_compare_and_swap_2: 1012 case Builtin::BI__sync_val_compare_and_swap_4: 1013 case Builtin::BI__sync_val_compare_and_swap_8: 1014 case Builtin::BI__sync_val_compare_and_swap_16: { 1015 QualType T = E->getType(); 1016 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0)); 1017 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 1018 1019 llvm::IntegerType *IntType = 1020 llvm::IntegerType::get(getLLVMContext(), 1021 getContext().getTypeSize(T)); 1022 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 1023 1024 Value *Args[3]; 1025 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType); 1026 Args[1] = EmitScalarExpr(E->getArg(1)); 1027 llvm::Type *ValueType = Args[1]->getType(); 1028 Args[1] = EmitToInt(*this, Args[1], T, IntType); 1029 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType); 1030 1031 Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2], 1032 llvm::SequentiallyConsistent, 1033 llvm::SequentiallyConsistent); 1034 Result = Builder.CreateExtractValue(Result, 0); 1035 Result = EmitFromInt(*this, Result, T, ValueType); 1036 return RValue::get(Result); 1037 } 1038 1039 case Builtin::BI__sync_bool_compare_and_swap_1: 1040 case Builtin::BI__sync_bool_compare_and_swap_2: 1041 case Builtin::BI__sync_bool_compare_and_swap_4: 1042 case Builtin::BI__sync_bool_compare_and_swap_8: 1043 case Builtin::BI__sync_bool_compare_and_swap_16: { 1044 QualType T = E->getArg(1)->getType(); 1045 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0)); 1046 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 1047 1048 llvm::IntegerType *IntType = 1049 llvm::IntegerType::get(getLLVMContext(), 1050 getContext().getTypeSize(T)); 1051 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 1052 1053 Value *Args[3]; 1054 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType); 1055 Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType); 1056 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType); 1057 1058 Value *Pair = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2], 1059 llvm::SequentiallyConsistent, 1060 llvm::SequentiallyConsistent); 1061 Value *Result = Builder.CreateExtractValue(Pair, 1); 1062 // zext bool to int. 1063 Result = Builder.CreateZExt(Result, ConvertType(E->getType())); 1064 return RValue::get(Result); 1065 } 1066 1067 case Builtin::BI__sync_swap_1: 1068 case Builtin::BI__sync_swap_2: 1069 case Builtin::BI__sync_swap_4: 1070 case Builtin::BI__sync_swap_8: 1071 case Builtin::BI__sync_swap_16: 1072 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1073 1074 case Builtin::BI__sync_lock_test_and_set_1: 1075 case Builtin::BI__sync_lock_test_and_set_2: 1076 case Builtin::BI__sync_lock_test_and_set_4: 1077 case Builtin::BI__sync_lock_test_and_set_8: 1078 case Builtin::BI__sync_lock_test_and_set_16: 1079 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1080 1081 case Builtin::BI__sync_lock_release_1: 1082 case Builtin::BI__sync_lock_release_2: 1083 case Builtin::BI__sync_lock_release_4: 1084 case Builtin::BI__sync_lock_release_8: 1085 case Builtin::BI__sync_lock_release_16: { 1086 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1087 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 1088 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 1089 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 1090 StoreSize.getQuantity() * 8); 1091 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 1092 llvm::StoreInst *Store = 1093 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr); 1094 Store->setAlignment(StoreSize.getQuantity()); 1095 Store->setAtomic(llvm::Release); 1096 return RValue::get(nullptr); 1097 } 1098 1099 case Builtin::BI__sync_synchronize: { 1100 // We assume this is supposed to correspond to a C++0x-style 1101 // sequentially-consistent fence (i.e. this is only usable for 1102 // synchonization, not device I/O or anything like that). This intrinsic 1103 // is really badly designed in the sense that in theory, there isn't 1104 // any way to safely use it... but in practice, it mostly works 1105 // to use it with non-atomic loads and stores to get acquire/release 1106 // semantics. 1107 Builder.CreateFence(llvm::SequentiallyConsistent); 1108 return RValue::get(nullptr); 1109 } 1110 1111 case Builtin::BI__c11_atomic_is_lock_free: 1112 case Builtin::BI__atomic_is_lock_free: { 1113 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the 1114 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since 1115 // _Atomic(T) is always properly-aligned. 1116 const char *LibCallName = "__atomic_is_lock_free"; 1117 CallArgList Args; 1118 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), 1119 getContext().getSizeType()); 1120 if (BuiltinID == Builtin::BI__atomic_is_lock_free) 1121 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), 1122 getContext().VoidPtrTy); 1123 else 1124 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), 1125 getContext().VoidPtrTy); 1126 const CGFunctionInfo &FuncInfo = 1127 CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args, 1128 FunctionType::ExtInfo(), 1129 RequiredArgs::All); 1130 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 1131 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 1132 return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args); 1133 } 1134 1135 case Builtin::BI__atomic_test_and_set: { 1136 // Look at the argument type to determine whether this is a volatile 1137 // operation. The parameter type is always volatile. 1138 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1139 bool Volatile = 1140 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1141 1142 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1143 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1144 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1145 Value *NewVal = Builder.getInt8(1); 1146 Value *Order = EmitScalarExpr(E->getArg(1)); 1147 if (isa<llvm::ConstantInt>(Order)) { 1148 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1149 AtomicRMWInst *Result = nullptr; 1150 switch (ord) { 1151 case 0: // memory_order_relaxed 1152 default: // invalid order 1153 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1154 Ptr, NewVal, 1155 llvm::Monotonic); 1156 break; 1157 case 1: // memory_order_consume 1158 case 2: // memory_order_acquire 1159 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1160 Ptr, NewVal, 1161 llvm::Acquire); 1162 break; 1163 case 3: // memory_order_release 1164 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1165 Ptr, NewVal, 1166 llvm::Release); 1167 break; 1168 case 4: // memory_order_acq_rel 1169 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1170 Ptr, NewVal, 1171 llvm::AcquireRelease); 1172 break; 1173 case 5: // memory_order_seq_cst 1174 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1175 Ptr, NewVal, 1176 llvm::SequentiallyConsistent); 1177 break; 1178 } 1179 Result->setVolatile(Volatile); 1180 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1181 } 1182 1183 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1184 1185 llvm::BasicBlock *BBs[5] = { 1186 createBasicBlock("monotonic", CurFn), 1187 createBasicBlock("acquire", CurFn), 1188 createBasicBlock("release", CurFn), 1189 createBasicBlock("acqrel", CurFn), 1190 createBasicBlock("seqcst", CurFn) 1191 }; 1192 llvm::AtomicOrdering Orders[5] = { 1193 llvm::Monotonic, llvm::Acquire, llvm::Release, 1194 llvm::AcquireRelease, llvm::SequentiallyConsistent 1195 }; 1196 1197 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1198 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1199 1200 Builder.SetInsertPoint(ContBB); 1201 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); 1202 1203 for (unsigned i = 0; i < 5; ++i) { 1204 Builder.SetInsertPoint(BBs[i]); 1205 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1206 Ptr, NewVal, Orders[i]); 1207 RMW->setVolatile(Volatile); 1208 Result->addIncoming(RMW, BBs[i]); 1209 Builder.CreateBr(ContBB); 1210 } 1211 1212 SI->addCase(Builder.getInt32(0), BBs[0]); 1213 SI->addCase(Builder.getInt32(1), BBs[1]); 1214 SI->addCase(Builder.getInt32(2), BBs[1]); 1215 SI->addCase(Builder.getInt32(3), BBs[2]); 1216 SI->addCase(Builder.getInt32(4), BBs[3]); 1217 SI->addCase(Builder.getInt32(5), BBs[4]); 1218 1219 Builder.SetInsertPoint(ContBB); 1220 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1221 } 1222 1223 case Builtin::BI__atomic_clear: { 1224 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1225 bool Volatile = 1226 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1227 1228 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1229 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1230 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1231 Value *NewVal = Builder.getInt8(0); 1232 Value *Order = EmitScalarExpr(E->getArg(1)); 1233 if (isa<llvm::ConstantInt>(Order)) { 1234 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1235 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1236 Store->setAlignment(1); 1237 switch (ord) { 1238 case 0: // memory_order_relaxed 1239 default: // invalid order 1240 Store->setOrdering(llvm::Monotonic); 1241 break; 1242 case 3: // memory_order_release 1243 Store->setOrdering(llvm::Release); 1244 break; 1245 case 5: // memory_order_seq_cst 1246 Store->setOrdering(llvm::SequentiallyConsistent); 1247 break; 1248 } 1249 return RValue::get(nullptr); 1250 } 1251 1252 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1253 1254 llvm::BasicBlock *BBs[3] = { 1255 createBasicBlock("monotonic", CurFn), 1256 createBasicBlock("release", CurFn), 1257 createBasicBlock("seqcst", CurFn) 1258 }; 1259 llvm::AtomicOrdering Orders[3] = { 1260 llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent 1261 }; 1262 1263 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1264 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1265 1266 for (unsigned i = 0; i < 3; ++i) { 1267 Builder.SetInsertPoint(BBs[i]); 1268 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1269 Store->setAlignment(1); 1270 Store->setOrdering(Orders[i]); 1271 Builder.CreateBr(ContBB); 1272 } 1273 1274 SI->addCase(Builder.getInt32(0), BBs[0]); 1275 SI->addCase(Builder.getInt32(3), BBs[1]); 1276 SI->addCase(Builder.getInt32(5), BBs[2]); 1277 1278 Builder.SetInsertPoint(ContBB); 1279 return RValue::get(nullptr); 1280 } 1281 1282 case Builtin::BI__atomic_thread_fence: 1283 case Builtin::BI__atomic_signal_fence: 1284 case Builtin::BI__c11_atomic_thread_fence: 1285 case Builtin::BI__c11_atomic_signal_fence: { 1286 llvm::SynchronizationScope Scope; 1287 if (BuiltinID == Builtin::BI__atomic_signal_fence || 1288 BuiltinID == Builtin::BI__c11_atomic_signal_fence) 1289 Scope = llvm::SingleThread; 1290 else 1291 Scope = llvm::CrossThread; 1292 Value *Order = EmitScalarExpr(E->getArg(0)); 1293 if (isa<llvm::ConstantInt>(Order)) { 1294 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1295 switch (ord) { 1296 case 0: // memory_order_relaxed 1297 default: // invalid order 1298 break; 1299 case 1: // memory_order_consume 1300 case 2: // memory_order_acquire 1301 Builder.CreateFence(llvm::Acquire, Scope); 1302 break; 1303 case 3: // memory_order_release 1304 Builder.CreateFence(llvm::Release, Scope); 1305 break; 1306 case 4: // memory_order_acq_rel 1307 Builder.CreateFence(llvm::AcquireRelease, Scope); 1308 break; 1309 case 5: // memory_order_seq_cst 1310 Builder.CreateFence(llvm::SequentiallyConsistent, Scope); 1311 break; 1312 } 1313 return RValue::get(nullptr); 1314 } 1315 1316 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; 1317 AcquireBB = createBasicBlock("acquire", CurFn); 1318 ReleaseBB = createBasicBlock("release", CurFn); 1319 AcqRelBB = createBasicBlock("acqrel", CurFn); 1320 SeqCstBB = createBasicBlock("seqcst", CurFn); 1321 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1322 1323 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1324 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); 1325 1326 Builder.SetInsertPoint(AcquireBB); 1327 Builder.CreateFence(llvm::Acquire, Scope); 1328 Builder.CreateBr(ContBB); 1329 SI->addCase(Builder.getInt32(1), AcquireBB); 1330 SI->addCase(Builder.getInt32(2), AcquireBB); 1331 1332 Builder.SetInsertPoint(ReleaseBB); 1333 Builder.CreateFence(llvm::Release, Scope); 1334 Builder.CreateBr(ContBB); 1335 SI->addCase(Builder.getInt32(3), ReleaseBB); 1336 1337 Builder.SetInsertPoint(AcqRelBB); 1338 Builder.CreateFence(llvm::AcquireRelease, Scope); 1339 Builder.CreateBr(ContBB); 1340 SI->addCase(Builder.getInt32(4), AcqRelBB); 1341 1342 Builder.SetInsertPoint(SeqCstBB); 1343 Builder.CreateFence(llvm::SequentiallyConsistent, Scope); 1344 Builder.CreateBr(ContBB); 1345 SI->addCase(Builder.getInt32(5), SeqCstBB); 1346 1347 Builder.SetInsertPoint(ContBB); 1348 return RValue::get(nullptr); 1349 } 1350 1351 // Library functions with special handling. 1352 case Builtin::BIsqrt: 1353 case Builtin::BIsqrtf: 1354 case Builtin::BIsqrtl: { 1355 // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only 1356 // in finite- or unsafe-math mode (the intrinsic has different semantics 1357 // for handling negative numbers compared to the library function, so 1358 // -fmath-errno=0 is not enough). 1359 if (!FD->hasAttr<ConstAttr>()) 1360 break; 1361 if (!(CGM.getCodeGenOpts().UnsafeFPMath || 1362 CGM.getCodeGenOpts().NoNaNsFPMath)) 1363 break; 1364 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 1365 llvm::Type *ArgType = Arg0->getType(); 1366 Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType); 1367 return RValue::get(Builder.CreateCall(F, Arg0)); 1368 } 1369 1370 case Builtin::BI__builtin_pow: 1371 case Builtin::BI__builtin_powf: 1372 case Builtin::BI__builtin_powl: 1373 case Builtin::BIpow: 1374 case Builtin::BIpowf: 1375 case Builtin::BIpowl: { 1376 // Transform a call to pow* into a @llvm.pow.* intrinsic call. 1377 if (!FD->hasAttr<ConstAttr>()) 1378 break; 1379 Value *Base = EmitScalarExpr(E->getArg(0)); 1380 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1381 llvm::Type *ArgType = Base->getType(); 1382 Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType); 1383 return RValue::get(Builder.CreateCall2(F, Base, Exponent)); 1384 } 1385 1386 case Builtin::BIfma: 1387 case Builtin::BIfmaf: 1388 case Builtin::BIfmal: 1389 case Builtin::BI__builtin_fma: 1390 case Builtin::BI__builtin_fmaf: 1391 case Builtin::BI__builtin_fmal: { 1392 // Rewrite fma to intrinsic. 1393 Value *FirstArg = EmitScalarExpr(E->getArg(0)); 1394 llvm::Type *ArgType = FirstArg->getType(); 1395 Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType); 1396 return RValue::get(Builder.CreateCall3(F, FirstArg, 1397 EmitScalarExpr(E->getArg(1)), 1398 EmitScalarExpr(E->getArg(2)))); 1399 } 1400 1401 case Builtin::BI__builtin_signbit: 1402 case Builtin::BI__builtin_signbitf: 1403 case Builtin::BI__builtin_signbitl: { 1404 LLVMContext &C = CGM.getLLVMContext(); 1405 1406 Value *Arg = EmitScalarExpr(E->getArg(0)); 1407 llvm::Type *ArgTy = Arg->getType(); 1408 int ArgWidth = ArgTy->getPrimitiveSizeInBits(); 1409 llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth); 1410 Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy); 1411 if (ArgTy->isPPC_FP128Ty()) { 1412 // The higher-order double comes first, and so we need to truncate the 1413 // pair to extract the overall sign. The order of the pair is the same 1414 // in both little- and big-Endian modes. 1415 ArgWidth >>= 1; 1416 ArgIntTy = llvm::IntegerType::get(C, ArgWidth); 1417 BCArg = Builder.CreateTrunc(BCArg, ArgIntTy); 1418 } 1419 Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy); 1420 Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp); 1421 return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType()))); 1422 } 1423 case Builtin::BI__builtin_annotation: { 1424 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); 1425 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, 1426 AnnVal->getType()); 1427 1428 // Get the annotation string, go through casts. Sema requires this to be a 1429 // non-wide string literal, potentially casted, so the cast<> is safe. 1430 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); 1431 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); 1432 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); 1433 } 1434 case Builtin::BI__builtin_addcb: 1435 case Builtin::BI__builtin_addcs: 1436 case Builtin::BI__builtin_addc: 1437 case Builtin::BI__builtin_addcl: 1438 case Builtin::BI__builtin_addcll: 1439 case Builtin::BI__builtin_subcb: 1440 case Builtin::BI__builtin_subcs: 1441 case Builtin::BI__builtin_subc: 1442 case Builtin::BI__builtin_subcl: 1443 case Builtin::BI__builtin_subcll: { 1444 1445 // We translate all of these builtins from expressions of the form: 1446 // int x = ..., y = ..., carryin = ..., carryout, result; 1447 // result = __builtin_addc(x, y, carryin, &carryout); 1448 // 1449 // to LLVM IR of the form: 1450 // 1451 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) 1452 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 1453 // %carry1 = extractvalue {i32, i1} %tmp1, 1 1454 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, 1455 // i32 %carryin) 1456 // %result = extractvalue {i32, i1} %tmp2, 0 1457 // %carry2 = extractvalue {i32, i1} %tmp2, 1 1458 // %tmp3 = or i1 %carry1, %carry2 1459 // %tmp4 = zext i1 %tmp3 to i32 1460 // store i32 %tmp4, i32* %carryout 1461 1462 // Scalarize our inputs. 1463 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1464 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1465 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); 1466 std::pair<llvm::Value*, unsigned> CarryOutPtr = 1467 EmitPointerWithAlignment(E->getArg(3)); 1468 1469 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. 1470 llvm::Intrinsic::ID IntrinsicId; 1471 switch (BuiltinID) { 1472 default: llvm_unreachable("Unknown multiprecision builtin id."); 1473 case Builtin::BI__builtin_addcb: 1474 case Builtin::BI__builtin_addcs: 1475 case Builtin::BI__builtin_addc: 1476 case Builtin::BI__builtin_addcl: 1477 case Builtin::BI__builtin_addcll: 1478 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 1479 break; 1480 case Builtin::BI__builtin_subcb: 1481 case Builtin::BI__builtin_subcs: 1482 case Builtin::BI__builtin_subc: 1483 case Builtin::BI__builtin_subcl: 1484 case Builtin::BI__builtin_subcll: 1485 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 1486 break; 1487 } 1488 1489 // Construct our resulting LLVM IR expression. 1490 llvm::Value *Carry1; 1491 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, 1492 X, Y, Carry1); 1493 llvm::Value *Carry2; 1494 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, 1495 Sum1, Carryin, Carry2); 1496 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), 1497 X->getType()); 1498 llvm::StoreInst *CarryOutStore = Builder.CreateStore(CarryOut, 1499 CarryOutPtr.first); 1500 CarryOutStore->setAlignment(CarryOutPtr.second); 1501 return RValue::get(Sum2); 1502 } 1503 case Builtin::BI__builtin_uadd_overflow: 1504 case Builtin::BI__builtin_uaddl_overflow: 1505 case Builtin::BI__builtin_uaddll_overflow: 1506 case Builtin::BI__builtin_usub_overflow: 1507 case Builtin::BI__builtin_usubl_overflow: 1508 case Builtin::BI__builtin_usubll_overflow: 1509 case Builtin::BI__builtin_umul_overflow: 1510 case Builtin::BI__builtin_umull_overflow: 1511 case Builtin::BI__builtin_umulll_overflow: 1512 case Builtin::BI__builtin_sadd_overflow: 1513 case Builtin::BI__builtin_saddl_overflow: 1514 case Builtin::BI__builtin_saddll_overflow: 1515 case Builtin::BI__builtin_ssub_overflow: 1516 case Builtin::BI__builtin_ssubl_overflow: 1517 case Builtin::BI__builtin_ssubll_overflow: 1518 case Builtin::BI__builtin_smul_overflow: 1519 case Builtin::BI__builtin_smull_overflow: 1520 case Builtin::BI__builtin_smulll_overflow: { 1521 1522 // We translate all of these builtins directly to the relevant llvm IR node. 1523 1524 // Scalarize our inputs. 1525 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1526 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1527 std::pair<llvm::Value *, unsigned> SumOutPtr = 1528 EmitPointerWithAlignment(E->getArg(2)); 1529 1530 // Decide which of the overflow intrinsics we are lowering to: 1531 llvm::Intrinsic::ID IntrinsicId; 1532 switch (BuiltinID) { 1533 default: llvm_unreachable("Unknown security overflow builtin id."); 1534 case Builtin::BI__builtin_uadd_overflow: 1535 case Builtin::BI__builtin_uaddl_overflow: 1536 case Builtin::BI__builtin_uaddll_overflow: 1537 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 1538 break; 1539 case Builtin::BI__builtin_usub_overflow: 1540 case Builtin::BI__builtin_usubl_overflow: 1541 case Builtin::BI__builtin_usubll_overflow: 1542 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 1543 break; 1544 case Builtin::BI__builtin_umul_overflow: 1545 case Builtin::BI__builtin_umull_overflow: 1546 case Builtin::BI__builtin_umulll_overflow: 1547 IntrinsicId = llvm::Intrinsic::umul_with_overflow; 1548 break; 1549 case Builtin::BI__builtin_sadd_overflow: 1550 case Builtin::BI__builtin_saddl_overflow: 1551 case Builtin::BI__builtin_saddll_overflow: 1552 IntrinsicId = llvm::Intrinsic::sadd_with_overflow; 1553 break; 1554 case Builtin::BI__builtin_ssub_overflow: 1555 case Builtin::BI__builtin_ssubl_overflow: 1556 case Builtin::BI__builtin_ssubll_overflow: 1557 IntrinsicId = llvm::Intrinsic::ssub_with_overflow; 1558 break; 1559 case Builtin::BI__builtin_smul_overflow: 1560 case Builtin::BI__builtin_smull_overflow: 1561 case Builtin::BI__builtin_smulll_overflow: 1562 IntrinsicId = llvm::Intrinsic::smul_with_overflow; 1563 break; 1564 } 1565 1566 1567 llvm::Value *Carry; 1568 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); 1569 llvm::StoreInst *SumOutStore = Builder.CreateStore(Sum, SumOutPtr.first); 1570 SumOutStore->setAlignment(SumOutPtr.second); 1571 1572 return RValue::get(Carry); 1573 } 1574 case Builtin::BI__builtin_addressof: 1575 return RValue::get(EmitLValue(E->getArg(0)).getAddress()); 1576 case Builtin::BI__builtin_operator_new: 1577 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 1578 E->getArg(0), false); 1579 case Builtin::BI__builtin_operator_delete: 1580 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 1581 E->getArg(0), true); 1582 case Builtin::BI__noop: 1583 // __noop always evaluates to an integer literal zero. 1584 return RValue::get(ConstantInt::get(IntTy, 0)); 1585 case Builtin::BI__builtin_call_with_static_chain: { 1586 const CallExpr *Call = cast<CallExpr>(E->getArg(0)); 1587 const Expr *Chain = E->getArg(1); 1588 return EmitCall(Call->getCallee()->getType(), 1589 EmitScalarExpr(Call->getCallee()), Call, ReturnValue, 1590 Call->getCalleeDecl(), EmitScalarExpr(Chain)); 1591 } 1592 case Builtin::BI_InterlockedExchange: 1593 case Builtin::BI_InterlockedExchangePointer: 1594 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1595 case Builtin::BI_InterlockedCompareExchangePointer: { 1596 llvm::Type *RTy; 1597 llvm::IntegerType *IntType = 1598 IntegerType::get(getLLVMContext(), 1599 getContext().getTypeSize(E->getType())); 1600 llvm::Type *IntPtrType = IntType->getPointerTo(); 1601 1602 llvm::Value *Destination = 1603 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType); 1604 1605 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); 1606 RTy = Exchange->getType(); 1607 Exchange = Builder.CreatePtrToInt(Exchange, IntType); 1608 1609 llvm::Value *Comparand = 1610 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); 1611 1612 auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, 1613 SequentiallyConsistent, 1614 SequentiallyConsistent); 1615 Result->setVolatile(true); 1616 1617 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, 1618 0), 1619 RTy)); 1620 } 1621 case Builtin::BI_InterlockedCompareExchange: { 1622 AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg( 1623 EmitScalarExpr(E->getArg(0)), 1624 EmitScalarExpr(E->getArg(2)), 1625 EmitScalarExpr(E->getArg(1)), 1626 SequentiallyConsistent, 1627 SequentiallyConsistent); 1628 CXI->setVolatile(true); 1629 return RValue::get(Builder.CreateExtractValue(CXI, 0)); 1630 } 1631 case Builtin::BI_InterlockedIncrement: { 1632 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 1633 AtomicRMWInst::Add, 1634 EmitScalarExpr(E->getArg(0)), 1635 ConstantInt::get(Int32Ty, 1), 1636 llvm::SequentiallyConsistent); 1637 RMWI->setVolatile(true); 1638 return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(Int32Ty, 1))); 1639 } 1640 case Builtin::BI_InterlockedDecrement: { 1641 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 1642 AtomicRMWInst::Sub, 1643 EmitScalarExpr(E->getArg(0)), 1644 ConstantInt::get(Int32Ty, 1), 1645 llvm::SequentiallyConsistent); 1646 RMWI->setVolatile(true); 1647 return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(Int32Ty, 1))); 1648 } 1649 case Builtin::BI_InterlockedExchangeAdd: { 1650 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 1651 AtomicRMWInst::Add, 1652 EmitScalarExpr(E->getArg(0)), 1653 EmitScalarExpr(E->getArg(1)), 1654 llvm::SequentiallyConsistent); 1655 RMWI->setVolatile(true); 1656 return RValue::get(RMWI); 1657 } 1658 case Builtin::BI__readfsdword: { 1659 Value *IntToPtr = 1660 Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 1661 llvm::PointerType::get(CGM.Int32Ty, 257)); 1662 LoadInst *Load = 1663 Builder.CreateAlignedLoad(IntToPtr, /*Align=*/4, /*isVolatile=*/true); 1664 return RValue::get(Load); 1665 } 1666 1667 case Builtin::BI__exception_code: 1668 case Builtin::BI_exception_code: 1669 return RValue::get(EmitSEHExceptionCode()); 1670 case Builtin::BI__exception_info: 1671 case Builtin::BI_exception_info: 1672 return RValue::get(EmitSEHExceptionInfo()); 1673 case Builtin::BI__abnormal_termination: 1674 case Builtin::BI_abnormal_termination: 1675 return RValue::get(EmitSEHAbnormalTermination()); 1676 case Builtin::BI_setjmpex: { 1677 if (getTarget().getTriple().isOSMSVCRT()) { 1678 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 1679 llvm::AttributeSet ReturnsTwiceAttr = 1680 AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, 1681 llvm::Attribute::ReturnsTwice); 1682 llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction( 1683 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 1684 "_setjmpex", ReturnsTwiceAttr); 1685 llvm::Value *Buf = 1686 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy); 1687 llvm::Value *FrameAddr = 1688 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 1689 ConstantInt::get(Int32Ty, 0)); 1690 llvm::Value *Args[] = {Buf, FrameAddr}; 1691 llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args); 1692 CS.setAttributes(ReturnsTwiceAttr); 1693 return RValue::get(CS.getInstruction()); 1694 } 1695 } 1696 case Builtin::BI_setjmp: { 1697 if (getTarget().getTriple().isOSMSVCRT()) { 1698 llvm::AttributeSet ReturnsTwiceAttr = 1699 AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, 1700 llvm::Attribute::ReturnsTwice); 1701 llvm::Value *Buf = 1702 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy); 1703 llvm::CallSite CS; 1704 if (getTarget().getTriple().getArch() == llvm::Triple::x86) { 1705 llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy}; 1706 llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction( 1707 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true), 1708 "_setjmp3", ReturnsTwiceAttr); 1709 llvm::Value *Count = ConstantInt::get(IntTy, 0); 1710 llvm::Value *Args[] = {Buf, Count}; 1711 CS = EmitRuntimeCallOrInvoke(SetJmp3, Args); 1712 } else { 1713 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 1714 llvm::Constant *SetJmp = CGM.CreateRuntimeFunction( 1715 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 1716 "_setjmp", ReturnsTwiceAttr); 1717 llvm::Value *FrameAddr = 1718 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 1719 ConstantInt::get(Int32Ty, 0)); 1720 llvm::Value *Args[] = {Buf, FrameAddr}; 1721 CS = EmitRuntimeCallOrInvoke(SetJmp, Args); 1722 } 1723 CS.setAttributes(ReturnsTwiceAttr); 1724 return RValue::get(CS.getInstruction()); 1725 } 1726 } 1727 } 1728 1729 // If this is an alias for a lib function (e.g. __builtin_sin), emit 1730 // the call using the normal call path, but using the unmangled 1731 // version of the function name. 1732 if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) 1733 return emitLibraryCall(*this, FD, E, 1734 CGM.getBuiltinLibFunction(FD, BuiltinID)); 1735 1736 // If this is a predefined lib function (e.g. malloc), emit the call 1737 // using exactly the normal call path. 1738 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 1739 return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee())); 1740 1741 // See if we have a target specific intrinsic. 1742 const char *Name = getContext().BuiltinInfo.GetName(BuiltinID); 1743 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 1744 if (const char *Prefix = 1745 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) { 1746 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name); 1747 // NOTE we dont need to perform a compatibility flag check here since the 1748 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the 1749 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. 1750 if (IntrinsicID == Intrinsic::not_intrinsic) 1751 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix, Name); 1752 } 1753 1754 if (IntrinsicID != Intrinsic::not_intrinsic) { 1755 SmallVector<Value*, 16> Args; 1756 1757 // Find out if any arguments are required to be integer constant 1758 // expressions. 1759 unsigned ICEArguments = 0; 1760 ASTContext::GetBuiltinTypeError Error; 1761 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 1762 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 1763 1764 Function *F = CGM.getIntrinsic(IntrinsicID); 1765 llvm::FunctionType *FTy = F->getFunctionType(); 1766 1767 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 1768 Value *ArgValue; 1769 // If this is a normal argument, just emit it as a scalar. 1770 if ((ICEArguments & (1 << i)) == 0) { 1771 ArgValue = EmitScalarExpr(E->getArg(i)); 1772 } else { 1773 // If this is required to be a constant, constant fold it so that we 1774 // know that the generated intrinsic gets a ConstantInt. 1775 llvm::APSInt Result; 1776 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 1777 assert(IsConst && "Constant arg isn't actually constant?"); 1778 (void)IsConst; 1779 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 1780 } 1781 1782 // If the intrinsic arg type is different from the builtin arg type 1783 // we need to do a bit cast. 1784 llvm::Type *PTy = FTy->getParamType(i); 1785 if (PTy != ArgValue->getType()) { 1786 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 1787 "Must be able to losslessly bit cast to param"); 1788 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 1789 } 1790 1791 Args.push_back(ArgValue); 1792 } 1793 1794 Value *V = Builder.CreateCall(F, Args); 1795 QualType BuiltinRetType = E->getType(); 1796 1797 llvm::Type *RetTy = VoidTy; 1798 if (!BuiltinRetType->isVoidType()) 1799 RetTy = ConvertType(BuiltinRetType); 1800 1801 if (RetTy != V->getType()) { 1802 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 1803 "Must be able to losslessly bit cast result type"); 1804 V = Builder.CreateBitCast(V, RetTy); 1805 } 1806 1807 return RValue::get(V); 1808 } 1809 1810 // See if we have a target specific builtin that needs to be lowered. 1811 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 1812 return RValue::get(V); 1813 1814 ErrorUnsupported(E, "builtin function"); 1815 1816 // Unknown builtin, for now just dump it out and return undef. 1817 return GetUndefRValue(E->getType()); 1818 } 1819 1820 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 1821 const CallExpr *E) { 1822 switch (getTarget().getTriple().getArch()) { 1823 case llvm::Triple::arm: 1824 case llvm::Triple::armeb: 1825 case llvm::Triple::thumb: 1826 case llvm::Triple::thumbeb: 1827 return EmitARMBuiltinExpr(BuiltinID, E); 1828 case llvm::Triple::aarch64: 1829 case llvm::Triple::aarch64_be: 1830 return EmitAArch64BuiltinExpr(BuiltinID, E); 1831 case llvm::Triple::x86: 1832 case llvm::Triple::x86_64: 1833 return EmitX86BuiltinExpr(BuiltinID, E); 1834 case llvm::Triple::ppc: 1835 case llvm::Triple::ppc64: 1836 case llvm::Triple::ppc64le: 1837 return EmitPPCBuiltinExpr(BuiltinID, E); 1838 case llvm::Triple::r600: 1839 case llvm::Triple::amdgcn: 1840 return EmitR600BuiltinExpr(BuiltinID, E); 1841 default: 1842 return nullptr; 1843 } 1844 } 1845 1846 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, 1847 NeonTypeFlags TypeFlags, 1848 bool V1Ty=false) { 1849 int IsQuad = TypeFlags.isQuad(); 1850 switch (TypeFlags.getEltType()) { 1851 case NeonTypeFlags::Int8: 1852 case NeonTypeFlags::Poly8: 1853 return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); 1854 case NeonTypeFlags::Int16: 1855 case NeonTypeFlags::Poly16: 1856 case NeonTypeFlags::Float16: 1857 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 1858 case NeonTypeFlags::Int32: 1859 return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); 1860 case NeonTypeFlags::Int64: 1861 case NeonTypeFlags::Poly64: 1862 return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); 1863 case NeonTypeFlags::Poly128: 1864 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. 1865 // There is a lot of i128 and f128 API missing. 1866 // so we use v16i8 to represent poly128 and get pattern matched. 1867 return llvm::VectorType::get(CGF->Int8Ty, 16); 1868 case NeonTypeFlags::Float32: 1869 return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); 1870 case NeonTypeFlags::Float64: 1871 return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); 1872 } 1873 llvm_unreachable("Unknown vector element type!"); 1874 } 1875 1876 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 1877 unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements(); 1878 Value* SV = llvm::ConstantVector::getSplat(nElts, C); 1879 return Builder.CreateShuffleVector(V, V, SV, "lane"); 1880 } 1881 1882 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 1883 const char *name, 1884 unsigned shift, bool rightshift) { 1885 unsigned j = 0; 1886 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 1887 ai != ae; ++ai, ++j) 1888 if (shift > 0 && shift == j) 1889 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 1890 else 1891 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 1892 1893 return Builder.CreateCall(F, Ops, name); 1894 } 1895 1896 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 1897 bool neg) { 1898 int SV = cast<ConstantInt>(V)->getSExtValue(); 1899 1900 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 1901 llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV); 1902 return llvm::ConstantVector::getSplat(VTy->getNumElements(), C); 1903 } 1904 1905 // \brief Right-shift a vector by a constant. 1906 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, 1907 llvm::Type *Ty, bool usgn, 1908 const char *name) { 1909 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 1910 1911 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); 1912 int EltSize = VTy->getScalarSizeInBits(); 1913 1914 Vec = Builder.CreateBitCast(Vec, Ty); 1915 1916 // lshr/ashr are undefined when the shift amount is equal to the vector 1917 // element size. 1918 if (ShiftAmt == EltSize) { 1919 if (usgn) { 1920 // Right-shifting an unsigned value by its size yields 0. 1921 llvm::Constant *Zero = ConstantInt::get(VTy->getElementType(), 0); 1922 return llvm::ConstantVector::getSplat(VTy->getNumElements(), Zero); 1923 } else { 1924 // Right-shifting a signed value by its size is equivalent 1925 // to a shift of size-1. 1926 --ShiftAmt; 1927 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); 1928 } 1929 } 1930 1931 Shift = EmitNeonShiftVector(Shift, Ty, false); 1932 if (usgn) 1933 return Builder.CreateLShr(Vec, Shift, name); 1934 else 1935 return Builder.CreateAShr(Vec, Shift, name); 1936 } 1937 1938 /// GetPointeeAlignment - Given an expression with a pointer type, find the 1939 /// alignment of the type referenced by the pointer. Skip over implicit 1940 /// casts. 1941 std::pair<llvm::Value*, unsigned> 1942 CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) { 1943 assert(Addr->getType()->isPointerType()); 1944 Addr = Addr->IgnoreParens(); 1945 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Addr)) { 1946 if ((ICE->getCastKind() == CK_BitCast || ICE->getCastKind() == CK_NoOp) && 1947 ICE->getSubExpr()->getType()->isPointerType()) { 1948 std::pair<llvm::Value*, unsigned> Ptr = 1949 EmitPointerWithAlignment(ICE->getSubExpr()); 1950 Ptr.first = Builder.CreateBitCast(Ptr.first, 1951 ConvertType(Addr->getType())); 1952 return Ptr; 1953 } else if (ICE->getCastKind() == CK_ArrayToPointerDecay) { 1954 LValue LV = EmitLValue(ICE->getSubExpr()); 1955 unsigned Align = LV.getAlignment().getQuantity(); 1956 if (!Align) { 1957 // FIXME: Once LValues are fixed to always set alignment, 1958 // zap this code. 1959 QualType PtTy = ICE->getSubExpr()->getType(); 1960 if (!PtTy->isIncompleteType()) 1961 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 1962 else 1963 Align = 1; 1964 } 1965 return std::make_pair(LV.getAddress(), Align); 1966 } 1967 } 1968 if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(Addr)) { 1969 if (UO->getOpcode() == UO_AddrOf) { 1970 LValue LV = EmitLValue(UO->getSubExpr()); 1971 unsigned Align = LV.getAlignment().getQuantity(); 1972 if (!Align) { 1973 // FIXME: Once LValues are fixed to always set alignment, 1974 // zap this code. 1975 QualType PtTy = UO->getSubExpr()->getType(); 1976 if (!PtTy->isIncompleteType()) 1977 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 1978 else 1979 Align = 1; 1980 } 1981 return std::make_pair(LV.getAddress(), Align); 1982 } 1983 } 1984 1985 unsigned Align = 1; 1986 QualType PtTy = Addr->getType()->getPointeeType(); 1987 if (!PtTy->isIncompleteType()) 1988 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 1989 1990 return std::make_pair(EmitScalarExpr(Addr), Align); 1991 } 1992 1993 enum { 1994 AddRetType = (1 << 0), 1995 Add1ArgType = (1 << 1), 1996 Add2ArgTypes = (1 << 2), 1997 1998 VectorizeRetType = (1 << 3), 1999 VectorizeArgTypes = (1 << 4), 2000 2001 InventFloatType = (1 << 5), 2002 UnsignedAlts = (1 << 6), 2003 2004 Use64BitVectors = (1 << 7), 2005 Use128BitVectors = (1 << 8), 2006 2007 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, 2008 VectorRet = AddRetType | VectorizeRetType, 2009 VectorRetGetArgs01 = 2010 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, 2011 FpCmpzModifiers = 2012 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType 2013 }; 2014 2015 struct NeonIntrinsicInfo { 2016 unsigned BuiltinID; 2017 unsigned LLVMIntrinsic; 2018 unsigned AltLLVMIntrinsic; 2019 const char *NameHint; 2020 unsigned TypeModifier; 2021 2022 bool operator<(unsigned RHSBuiltinID) const { 2023 return BuiltinID < RHSBuiltinID; 2024 } 2025 }; 2026 2027 #define NEONMAP0(NameBase) \ 2028 { NEON::BI__builtin_neon_ ## NameBase, 0, 0, #NameBase, 0 } 2029 2030 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 2031 { NEON:: BI__builtin_neon_ ## NameBase, \ 2032 Intrinsic::LLVMIntrinsic, 0, #NameBase, TypeModifier } 2033 2034 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ 2035 { NEON:: BI__builtin_neon_ ## NameBase, \ 2036 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ 2037 #NameBase, TypeModifier } 2038 2039 static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { 2040 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 2041 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 2042 NEONMAP1(vabs_v, arm_neon_vabs, 0), 2043 NEONMAP1(vabsq_v, arm_neon_vabs, 0), 2044 NEONMAP0(vaddhn_v), 2045 NEONMAP1(vaesdq_v, arm_neon_aesd, 0), 2046 NEONMAP1(vaeseq_v, arm_neon_aese, 0), 2047 NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), 2048 NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), 2049 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), 2050 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), 2051 NEONMAP1(vcage_v, arm_neon_vacge, 0), 2052 NEONMAP1(vcageq_v, arm_neon_vacge, 0), 2053 NEONMAP1(vcagt_v, arm_neon_vacgt, 0), 2054 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0), 2055 NEONMAP1(vcale_v, arm_neon_vacge, 0), 2056 NEONMAP1(vcaleq_v, arm_neon_vacge, 0), 2057 NEONMAP1(vcalt_v, arm_neon_vacgt, 0), 2058 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), 2059 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), 2060 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), 2061 NEONMAP1(vclz_v, ctlz, Add1ArgType), 2062 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 2063 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 2064 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 2065 NEONMAP1(vcvt_f16_v, arm_neon_vcvtfp2hf, 0), 2066 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), 2067 NEONMAP0(vcvt_f32_v), 2068 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 2069 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), 2070 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), 2071 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), 2072 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), 2073 NEONMAP0(vcvt_s32_v), 2074 NEONMAP0(vcvt_s64_v), 2075 NEONMAP0(vcvt_u32_v), 2076 NEONMAP0(vcvt_u64_v), 2077 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), 2078 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), 2079 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), 2080 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), 2081 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), 2082 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), 2083 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), 2084 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), 2085 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), 2086 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), 2087 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), 2088 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), 2089 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), 2090 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), 2091 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), 2092 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), 2093 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), 2094 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), 2095 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), 2096 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), 2097 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), 2098 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), 2099 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), 2100 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), 2101 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), 2102 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), 2103 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), 2104 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), 2105 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), 2106 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), 2107 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), 2108 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), 2109 NEONMAP0(vcvtq_f32_v), 2110 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 2111 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), 2112 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), 2113 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), 2114 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), 2115 NEONMAP0(vcvtq_s32_v), 2116 NEONMAP0(vcvtq_s64_v), 2117 NEONMAP0(vcvtq_u32_v), 2118 NEONMAP0(vcvtq_u64_v), 2119 NEONMAP0(vext_v), 2120 NEONMAP0(vextq_v), 2121 NEONMAP0(vfma_v), 2122 NEONMAP0(vfmaq_v), 2123 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 2124 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 2125 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 2126 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 2127 NEONMAP0(vld1_dup_v), 2128 NEONMAP1(vld1_v, arm_neon_vld1, 0), 2129 NEONMAP0(vld1q_dup_v), 2130 NEONMAP1(vld1q_v, arm_neon_vld1, 0), 2131 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), 2132 NEONMAP1(vld2_v, arm_neon_vld2, 0), 2133 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), 2134 NEONMAP1(vld2q_v, arm_neon_vld2, 0), 2135 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), 2136 NEONMAP1(vld3_v, arm_neon_vld3, 0), 2137 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), 2138 NEONMAP1(vld3q_v, arm_neon_vld3, 0), 2139 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), 2140 NEONMAP1(vld4_v, arm_neon_vld4, 0), 2141 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), 2142 NEONMAP1(vld4q_v, arm_neon_vld4, 0), 2143 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 2144 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), 2145 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), 2146 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 2147 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 2148 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), 2149 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), 2150 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 2151 NEONMAP0(vmovl_v), 2152 NEONMAP0(vmovn_v), 2153 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), 2154 NEONMAP0(vmull_v), 2155 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType), 2156 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 2157 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 2158 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType), 2159 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 2160 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 2161 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType), 2162 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts), 2163 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), 2164 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), 2165 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), 2166 NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 2167 NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 2168 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0), 2169 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0), 2170 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), 2171 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), 2172 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), 2173 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts), 2174 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), 2175 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), 2176 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), 2177 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), 2178 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), 2179 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 2180 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 2181 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 2182 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 2183 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 2184 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 2185 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), 2186 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), 2187 NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 2188 NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 2189 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), 2190 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 2191 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 2192 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType), 2193 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), 2194 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 2195 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 2196 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), 2197 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), 2198 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), 2199 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), 2200 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), 2201 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), 2202 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), 2203 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), 2204 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), 2205 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), 2206 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), 2207 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), 2208 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 2209 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 2210 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 2211 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 2212 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 2213 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 2214 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), 2215 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), 2216 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), 2217 NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0), 2218 NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0), 2219 NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0), 2220 NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0), 2221 NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0), 2222 NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0), 2223 NEONMAP0(vshl_n_v), 2224 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 2225 NEONMAP0(vshll_n_v), 2226 NEONMAP0(vshlq_n_v), 2227 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 2228 NEONMAP0(vshr_n_v), 2229 NEONMAP0(vshrn_n_v), 2230 NEONMAP0(vshrq_n_v), 2231 NEONMAP1(vst1_v, arm_neon_vst1, 0), 2232 NEONMAP1(vst1q_v, arm_neon_vst1, 0), 2233 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), 2234 NEONMAP1(vst2_v, arm_neon_vst2, 0), 2235 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), 2236 NEONMAP1(vst2q_v, arm_neon_vst2, 0), 2237 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0), 2238 NEONMAP1(vst3_v, arm_neon_vst3, 0), 2239 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0), 2240 NEONMAP1(vst3q_v, arm_neon_vst3, 0), 2241 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0), 2242 NEONMAP1(vst4_v, arm_neon_vst4, 0), 2243 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0), 2244 NEONMAP1(vst4q_v, arm_neon_vst4, 0), 2245 NEONMAP0(vsubhn_v), 2246 NEONMAP0(vtrn_v), 2247 NEONMAP0(vtrnq_v), 2248 NEONMAP0(vtst_v), 2249 NEONMAP0(vtstq_v), 2250 NEONMAP0(vuzp_v), 2251 NEONMAP0(vuzpq_v), 2252 NEONMAP0(vzip_v), 2253 NEONMAP0(vzipq_v) 2254 }; 2255 2256 static NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { 2257 NEONMAP1(vabs_v, aarch64_neon_abs, 0), 2258 NEONMAP1(vabsq_v, aarch64_neon_abs, 0), 2259 NEONMAP0(vaddhn_v), 2260 NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), 2261 NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), 2262 NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), 2263 NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), 2264 NEONMAP1(vcage_v, aarch64_neon_facge, 0), 2265 NEONMAP1(vcageq_v, aarch64_neon_facge, 0), 2266 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), 2267 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), 2268 NEONMAP1(vcale_v, aarch64_neon_facge, 0), 2269 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), 2270 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), 2271 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), 2272 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), 2273 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), 2274 NEONMAP1(vclz_v, ctlz, Add1ArgType), 2275 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 2276 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 2277 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 2278 NEONMAP1(vcvt_f16_v, aarch64_neon_vcvtfp2hf, 0), 2279 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), 2280 NEONMAP0(vcvt_f32_v), 2281 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2282 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2283 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 2284 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 2285 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 2286 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 2287 NEONMAP0(vcvtq_f32_v), 2288 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2289 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2290 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 2291 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 2292 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 2293 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 2294 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), 2295 NEONMAP0(vext_v), 2296 NEONMAP0(vextq_v), 2297 NEONMAP0(vfma_v), 2298 NEONMAP0(vfmaq_v), 2299 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 2300 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 2301 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 2302 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 2303 NEONMAP0(vmovl_v), 2304 NEONMAP0(vmovn_v), 2305 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), 2306 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), 2307 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), 2308 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 2309 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 2310 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), 2311 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), 2312 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), 2313 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 2314 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 2315 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), 2316 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), 2317 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), 2318 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), 2319 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), 2320 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), 2321 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), 2322 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), 2323 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), 2324 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), 2325 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), 2326 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 2327 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 2328 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), 2329 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 2330 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), 2331 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 2332 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), 2333 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), 2334 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 2335 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 2336 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), 2337 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 2338 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 2339 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), 2340 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), 2341 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 2342 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 2343 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 2344 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 2345 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 2346 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 2347 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 2348 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 2349 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), 2350 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), 2351 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), 2352 NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0), 2353 NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0), 2354 NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0), 2355 NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0), 2356 NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0), 2357 NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0), 2358 NEONMAP0(vshl_n_v), 2359 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 2360 NEONMAP0(vshll_n_v), 2361 NEONMAP0(vshlq_n_v), 2362 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 2363 NEONMAP0(vshr_n_v), 2364 NEONMAP0(vshrn_n_v), 2365 NEONMAP0(vshrq_n_v), 2366 NEONMAP0(vsubhn_v), 2367 NEONMAP0(vtst_v), 2368 NEONMAP0(vtstq_v), 2369 }; 2370 2371 static NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { 2372 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), 2373 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), 2374 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), 2375 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 2376 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 2377 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 2378 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 2379 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 2380 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 2381 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 2382 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 2383 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), 2384 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 2385 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), 2386 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 2387 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 2388 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 2389 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 2390 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 2391 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 2392 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 2393 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 2394 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 2395 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 2396 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 2397 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 2398 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 2399 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 2400 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 2401 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 2402 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 2403 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 2404 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 2405 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 2406 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 2407 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 2408 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 2409 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 2410 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 2411 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 2412 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 2413 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 2414 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 2415 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 2416 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 2417 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 2418 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 2419 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 2420 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), 2421 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 2422 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 2423 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 2424 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 2425 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 2426 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 2427 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 2428 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 2429 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 2430 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 2431 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 2432 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 2433 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 2434 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 2435 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 2436 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 2437 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 2438 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 2439 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 2440 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 2441 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), 2442 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), 2443 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), 2444 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 2445 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 2446 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 2447 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 2448 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 2449 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 2450 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 2451 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 2452 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 2453 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 2454 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 2455 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), 2456 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 2457 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), 2458 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 2459 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 2460 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), 2461 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), 2462 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 2463 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 2464 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), 2465 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), 2466 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), 2467 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), 2468 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), 2469 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), 2470 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), 2471 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), 2472 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 2473 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 2474 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 2475 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 2476 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), 2477 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 2478 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 2479 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 2480 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), 2481 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 2482 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), 2483 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), 2484 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), 2485 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 2486 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 2487 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), 2488 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), 2489 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 2490 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 2491 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), 2492 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), 2493 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), 2494 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), 2495 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 2496 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 2497 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 2498 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 2499 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), 2500 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 2501 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 2502 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 2503 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 2504 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 2505 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 2506 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), 2507 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), 2508 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 2509 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 2510 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 2511 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 2512 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), 2513 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), 2514 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), 2515 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), 2516 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 2517 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 2518 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), 2519 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), 2520 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), 2521 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 2522 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 2523 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 2524 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 2525 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), 2526 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 2527 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 2528 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 2529 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 2530 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), 2531 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), 2532 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 2533 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 2534 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), 2535 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), 2536 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), 2537 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), 2538 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), 2539 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), 2540 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), 2541 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), 2542 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), 2543 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), 2544 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), 2545 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), 2546 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), 2547 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), 2548 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), 2549 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), 2550 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), 2551 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), 2552 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), 2553 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), 2554 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 2555 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), 2556 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 2557 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), 2558 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), 2559 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), 2560 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 2561 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), 2562 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 2563 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), 2564 }; 2565 2566 #undef NEONMAP0 2567 #undef NEONMAP1 2568 #undef NEONMAP2 2569 2570 static bool NEONSIMDIntrinsicsProvenSorted = false; 2571 2572 static bool AArch64SIMDIntrinsicsProvenSorted = false; 2573 static bool AArch64SISDIntrinsicsProvenSorted = false; 2574 2575 2576 static const NeonIntrinsicInfo * 2577 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap, 2578 unsigned BuiltinID, bool &MapProvenSorted) { 2579 2580 #ifndef NDEBUG 2581 if (!MapProvenSorted) { 2582 // FIXME: use std::is_sorted once C++11 is allowed 2583 for (unsigned i = 0; i < IntrinsicMap.size() - 1; ++i) 2584 assert(IntrinsicMap[i].BuiltinID <= IntrinsicMap[i + 1].BuiltinID); 2585 MapProvenSorted = true; 2586 } 2587 #endif 2588 2589 const NeonIntrinsicInfo *Builtin = 2590 std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID); 2591 2592 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) 2593 return Builtin; 2594 2595 return nullptr; 2596 } 2597 2598 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, 2599 unsigned Modifier, 2600 llvm::Type *ArgType, 2601 const CallExpr *E) { 2602 int VectorSize = 0; 2603 if (Modifier & Use64BitVectors) 2604 VectorSize = 64; 2605 else if (Modifier & Use128BitVectors) 2606 VectorSize = 128; 2607 2608 // Return type. 2609 SmallVector<llvm::Type *, 3> Tys; 2610 if (Modifier & AddRetType) { 2611 llvm::Type *Ty = ConvertType(E->getCallReturnType()); 2612 if (Modifier & VectorizeRetType) 2613 Ty = llvm::VectorType::get( 2614 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); 2615 2616 Tys.push_back(Ty); 2617 } 2618 2619 // Arguments. 2620 if (Modifier & VectorizeArgTypes) { 2621 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1; 2622 ArgType = llvm::VectorType::get(ArgType, Elts); 2623 } 2624 2625 if (Modifier & (Add1ArgType | Add2ArgTypes)) 2626 Tys.push_back(ArgType); 2627 2628 if (Modifier & Add2ArgTypes) 2629 Tys.push_back(ArgType); 2630 2631 if (Modifier & InventFloatType) 2632 Tys.push_back(FloatTy); 2633 2634 return CGM.getIntrinsic(IntrinsicID, Tys); 2635 } 2636 2637 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, 2638 const NeonIntrinsicInfo &SISDInfo, 2639 SmallVectorImpl<Value *> &Ops, 2640 const CallExpr *E) { 2641 unsigned BuiltinID = SISDInfo.BuiltinID; 2642 unsigned int Int = SISDInfo.LLVMIntrinsic; 2643 unsigned Modifier = SISDInfo.TypeModifier; 2644 const char *s = SISDInfo.NameHint; 2645 2646 switch (BuiltinID) { 2647 case NEON::BI__builtin_neon_vcled_s64: 2648 case NEON::BI__builtin_neon_vcled_u64: 2649 case NEON::BI__builtin_neon_vcles_f32: 2650 case NEON::BI__builtin_neon_vcled_f64: 2651 case NEON::BI__builtin_neon_vcltd_s64: 2652 case NEON::BI__builtin_neon_vcltd_u64: 2653 case NEON::BI__builtin_neon_vclts_f32: 2654 case NEON::BI__builtin_neon_vcltd_f64: 2655 case NEON::BI__builtin_neon_vcales_f32: 2656 case NEON::BI__builtin_neon_vcaled_f64: 2657 case NEON::BI__builtin_neon_vcalts_f32: 2658 case NEON::BI__builtin_neon_vcaltd_f64: 2659 // Only one direction of comparisons actually exist, cmle is actually a cmge 2660 // with swapped operands. The table gives us the right intrinsic but we 2661 // still need to do the swap. 2662 std::swap(Ops[0], Ops[1]); 2663 break; 2664 } 2665 2666 assert(Int && "Generic code assumes a valid intrinsic"); 2667 2668 // Determine the type(s) of this overloaded AArch64 intrinsic. 2669 const Expr *Arg = E->getArg(0); 2670 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); 2671 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); 2672 2673 int j = 0; 2674 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); 2675 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 2676 ai != ae; ++ai, ++j) { 2677 llvm::Type *ArgTy = ai->getType(); 2678 if (Ops[j]->getType()->getPrimitiveSizeInBits() == 2679 ArgTy->getPrimitiveSizeInBits()) 2680 continue; 2681 2682 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); 2683 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate 2684 // it before inserting. 2685 Ops[j] = 2686 CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType()); 2687 Ops[j] = 2688 CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0); 2689 } 2690 2691 Value *Result = CGF.EmitNeonCall(F, Ops, s); 2692 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 2693 if (ResultType->getPrimitiveSizeInBits() < 2694 Result->getType()->getPrimitiveSizeInBits()) 2695 return CGF.Builder.CreateExtractElement(Result, C0); 2696 2697 return CGF.Builder.CreateBitCast(Result, ResultType, s); 2698 } 2699 2700 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( 2701 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, 2702 const char *NameHint, unsigned Modifier, const CallExpr *E, 2703 SmallVectorImpl<llvm::Value *> &Ops, llvm::Value *Align) { 2704 // Get the last argument, which specifies the vector type. 2705 llvm::APSInt NeonTypeConst; 2706 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 2707 if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext())) 2708 return nullptr; 2709 2710 // Determine the type of this overloaded NEON intrinsic. 2711 NeonTypeFlags Type(NeonTypeConst.getZExtValue()); 2712 bool Usgn = Type.isUnsigned(); 2713 bool Quad = Type.isQuad(); 2714 2715 llvm::VectorType *VTy = GetNeonType(this, Type); 2716 llvm::Type *Ty = VTy; 2717 if (!Ty) 2718 return nullptr; 2719 2720 unsigned Int = LLVMIntrinsic; 2721 if ((Modifier & UnsignedAlts) && !Usgn) 2722 Int = AltLLVMIntrinsic; 2723 2724 switch (BuiltinID) { 2725 default: break; 2726 case NEON::BI__builtin_neon_vabs_v: 2727 case NEON::BI__builtin_neon_vabsq_v: 2728 if (VTy->getElementType()->isFloatingPointTy()) 2729 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); 2730 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); 2731 case NEON::BI__builtin_neon_vaddhn_v: { 2732 llvm::VectorType *SrcTy = 2733 llvm::VectorType::getExtendedElementVectorType(VTy); 2734 2735 // %sum = add <4 x i32> %lhs, %rhs 2736 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 2737 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 2738 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); 2739 2740 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 2741 Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(), 2742 SrcTy->getScalarSizeInBits() / 2); 2743 ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt); 2744 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); 2745 2746 // %res = trunc <4 x i32> %high to <4 x i16> 2747 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); 2748 } 2749 case NEON::BI__builtin_neon_vcale_v: 2750 case NEON::BI__builtin_neon_vcaleq_v: 2751 case NEON::BI__builtin_neon_vcalt_v: 2752 case NEON::BI__builtin_neon_vcaltq_v: 2753 std::swap(Ops[0], Ops[1]); 2754 case NEON::BI__builtin_neon_vcage_v: 2755 case NEON::BI__builtin_neon_vcageq_v: 2756 case NEON::BI__builtin_neon_vcagt_v: 2757 case NEON::BI__builtin_neon_vcagtq_v: { 2758 llvm::Type *VecFlt = llvm::VectorType::get( 2759 VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy, 2760 VTy->getNumElements()); 2761 llvm::Type *Tys[] = { VTy, VecFlt }; 2762 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 2763 return EmitNeonCall(F, Ops, NameHint); 2764 } 2765 case NEON::BI__builtin_neon_vclz_v: 2766 case NEON::BI__builtin_neon_vclzq_v: 2767 // We generate target-independent intrinsic, which needs a second argument 2768 // for whether or not clz of zero is undefined; on ARM it isn't. 2769 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); 2770 break; 2771 case NEON::BI__builtin_neon_vcvt_f32_v: 2772 case NEON::BI__builtin_neon_vcvtq_f32_v: 2773 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2774 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad)); 2775 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 2776 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 2777 case NEON::BI__builtin_neon_vcvt_n_f32_v: 2778 case NEON::BI__builtin_neon_vcvt_n_f64_v: 2779 case NEON::BI__builtin_neon_vcvtq_n_f32_v: 2780 case NEON::BI__builtin_neon_vcvtq_n_f64_v: { 2781 bool Double = 2782 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 2783 llvm::Type *FloatTy = 2784 GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64 2785 : NeonTypeFlags::Float32, 2786 false, Quad)); 2787 llvm::Type *Tys[2] = { FloatTy, Ty }; 2788 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; 2789 Function *F = CGM.getIntrinsic(Int, Tys); 2790 return EmitNeonCall(F, Ops, "vcvt_n"); 2791 } 2792 case NEON::BI__builtin_neon_vcvt_n_s32_v: 2793 case NEON::BI__builtin_neon_vcvt_n_u32_v: 2794 case NEON::BI__builtin_neon_vcvt_n_s64_v: 2795 case NEON::BI__builtin_neon_vcvt_n_u64_v: 2796 case NEON::BI__builtin_neon_vcvtq_n_s32_v: 2797 case NEON::BI__builtin_neon_vcvtq_n_u32_v: 2798 case NEON::BI__builtin_neon_vcvtq_n_s64_v: 2799 case NEON::BI__builtin_neon_vcvtq_n_u64_v: { 2800 bool Double = 2801 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 2802 llvm::Type *FloatTy = 2803 GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64 2804 : NeonTypeFlags::Float32, 2805 false, Quad)); 2806 llvm::Type *Tys[2] = { Ty, FloatTy }; 2807 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 2808 return EmitNeonCall(F, Ops, "vcvt_n"); 2809 } 2810 case NEON::BI__builtin_neon_vcvt_s32_v: 2811 case NEON::BI__builtin_neon_vcvt_u32_v: 2812 case NEON::BI__builtin_neon_vcvt_s64_v: 2813 case NEON::BI__builtin_neon_vcvt_u64_v: 2814 case NEON::BI__builtin_neon_vcvtq_s32_v: 2815 case NEON::BI__builtin_neon_vcvtq_u32_v: 2816 case NEON::BI__builtin_neon_vcvtq_s64_v: 2817 case NEON::BI__builtin_neon_vcvtq_u64_v: { 2818 bool Double = 2819 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 2820 llvm::Type *FloatTy = 2821 GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64 2822 : NeonTypeFlags::Float32, 2823 false, Quad)); 2824 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 2825 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 2826 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 2827 } 2828 case NEON::BI__builtin_neon_vcvta_s32_v: 2829 case NEON::BI__builtin_neon_vcvta_s64_v: 2830 case NEON::BI__builtin_neon_vcvta_u32_v: 2831 case NEON::BI__builtin_neon_vcvta_u64_v: 2832 case NEON::BI__builtin_neon_vcvtaq_s32_v: 2833 case NEON::BI__builtin_neon_vcvtaq_s64_v: 2834 case NEON::BI__builtin_neon_vcvtaq_u32_v: 2835 case NEON::BI__builtin_neon_vcvtaq_u64_v: 2836 case NEON::BI__builtin_neon_vcvtn_s32_v: 2837 case NEON::BI__builtin_neon_vcvtn_s64_v: 2838 case NEON::BI__builtin_neon_vcvtn_u32_v: 2839 case NEON::BI__builtin_neon_vcvtn_u64_v: 2840 case NEON::BI__builtin_neon_vcvtnq_s32_v: 2841 case NEON::BI__builtin_neon_vcvtnq_s64_v: 2842 case NEON::BI__builtin_neon_vcvtnq_u32_v: 2843 case NEON::BI__builtin_neon_vcvtnq_u64_v: 2844 case NEON::BI__builtin_neon_vcvtp_s32_v: 2845 case NEON::BI__builtin_neon_vcvtp_s64_v: 2846 case NEON::BI__builtin_neon_vcvtp_u32_v: 2847 case NEON::BI__builtin_neon_vcvtp_u64_v: 2848 case NEON::BI__builtin_neon_vcvtpq_s32_v: 2849 case NEON::BI__builtin_neon_vcvtpq_s64_v: 2850 case NEON::BI__builtin_neon_vcvtpq_u32_v: 2851 case NEON::BI__builtin_neon_vcvtpq_u64_v: 2852 case NEON::BI__builtin_neon_vcvtm_s32_v: 2853 case NEON::BI__builtin_neon_vcvtm_s64_v: 2854 case NEON::BI__builtin_neon_vcvtm_u32_v: 2855 case NEON::BI__builtin_neon_vcvtm_u64_v: 2856 case NEON::BI__builtin_neon_vcvtmq_s32_v: 2857 case NEON::BI__builtin_neon_vcvtmq_s64_v: 2858 case NEON::BI__builtin_neon_vcvtmq_u32_v: 2859 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 2860 bool Double = 2861 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 2862 llvm::Type *InTy = 2863 GetNeonType(this, 2864 NeonTypeFlags(Double ? NeonTypeFlags::Float64 2865 : NeonTypeFlags::Float32, false, Quad)); 2866 llvm::Type *Tys[2] = { Ty, InTy }; 2867 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); 2868 } 2869 case NEON::BI__builtin_neon_vext_v: 2870 case NEON::BI__builtin_neon_vextq_v: { 2871 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 2872 SmallVector<Constant*, 16> Indices; 2873 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 2874 Indices.push_back(ConstantInt::get(Int32Ty, i+CV)); 2875 2876 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2877 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 2878 Value *SV = llvm::ConstantVector::get(Indices); 2879 return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext"); 2880 } 2881 case NEON::BI__builtin_neon_vfma_v: 2882 case NEON::BI__builtin_neon_vfmaq_v: { 2883 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 2884 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2885 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 2886 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 2887 2888 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 2889 return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 2890 } 2891 case NEON::BI__builtin_neon_vld1_v: 2892 case NEON::BI__builtin_neon_vld1q_v: 2893 Ops.push_back(Align); 2894 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vld1"); 2895 case NEON::BI__builtin_neon_vld2_v: 2896 case NEON::BI__builtin_neon_vld2q_v: 2897 case NEON::BI__builtin_neon_vld3_v: 2898 case NEON::BI__builtin_neon_vld3q_v: 2899 case NEON::BI__builtin_neon_vld4_v: 2900 case NEON::BI__builtin_neon_vld4q_v: { 2901 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty); 2902 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, NameHint); 2903 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 2904 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2905 return Builder.CreateStore(Ops[1], Ops[0]); 2906 } 2907 case NEON::BI__builtin_neon_vld1_dup_v: 2908 case NEON::BI__builtin_neon_vld1q_dup_v: { 2909 Value *V = UndefValue::get(Ty); 2910 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 2911 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2912 LoadInst *Ld = Builder.CreateLoad(Ops[0]); 2913 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 2914 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 2915 Ops[0] = Builder.CreateInsertElement(V, Ld, CI); 2916 return EmitNeonSplat(Ops[0], CI); 2917 } 2918 case NEON::BI__builtin_neon_vld2_lane_v: 2919 case NEON::BI__builtin_neon_vld2q_lane_v: 2920 case NEON::BI__builtin_neon_vld3_lane_v: 2921 case NEON::BI__builtin_neon_vld3q_lane_v: 2922 case NEON::BI__builtin_neon_vld4_lane_v: 2923 case NEON::BI__builtin_neon_vld4q_lane_v: { 2924 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty); 2925 for (unsigned I = 2; I < Ops.size() - 1; ++I) 2926 Ops[I] = Builder.CreateBitCast(Ops[I], Ty); 2927 Ops.push_back(Align); 2928 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint); 2929 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 2930 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2931 return Builder.CreateStore(Ops[1], Ops[0]); 2932 } 2933 case NEON::BI__builtin_neon_vmovl_v: { 2934 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 2935 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 2936 if (Usgn) 2937 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 2938 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 2939 } 2940 case NEON::BI__builtin_neon_vmovn_v: { 2941 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 2942 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 2943 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 2944 } 2945 case NEON::BI__builtin_neon_vmull_v: 2946 // FIXME: the integer vmull operations could be emitted in terms of pure 2947 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of 2948 // hoisting the exts outside loops. Until global ISel comes along that can 2949 // see through such movement this leads to bad CodeGen. So we need an 2950 // intrinsic for now. 2951 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 2952 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 2953 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 2954 case NEON::BI__builtin_neon_vpadal_v: 2955 case NEON::BI__builtin_neon_vpadalq_v: { 2956 // The source operand type has twice as many elements of half the size. 2957 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 2958 llvm::Type *EltTy = 2959 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 2960 llvm::Type *NarrowTy = 2961 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 2962 llvm::Type *Tys[2] = { Ty, NarrowTy }; 2963 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 2964 } 2965 case NEON::BI__builtin_neon_vpaddl_v: 2966 case NEON::BI__builtin_neon_vpaddlq_v: { 2967 // The source operand type has twice as many elements of half the size. 2968 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 2969 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 2970 llvm::Type *NarrowTy = 2971 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 2972 llvm::Type *Tys[2] = { Ty, NarrowTy }; 2973 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); 2974 } 2975 case NEON::BI__builtin_neon_vqdmlal_v: 2976 case NEON::BI__builtin_neon_vqdmlsl_v: { 2977 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 2978 Value *Mul = EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), 2979 MulOps, "vqdmlal"); 2980 2981 SmallVector<Value *, 2> AccumOps; 2982 AccumOps.push_back(Ops[0]); 2983 AccumOps.push_back(Mul); 2984 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), 2985 AccumOps, NameHint); 2986 } 2987 case NEON::BI__builtin_neon_vqshl_n_v: 2988 case NEON::BI__builtin_neon_vqshlq_n_v: 2989 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 2990 1, false); 2991 case NEON::BI__builtin_neon_vqshlu_n_v: 2992 case NEON::BI__builtin_neon_vqshluq_n_v: 2993 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 2994 1, false); 2995 case NEON::BI__builtin_neon_vrecpe_v: 2996 case NEON::BI__builtin_neon_vrecpeq_v: 2997 case NEON::BI__builtin_neon_vrsqrte_v: 2998 case NEON::BI__builtin_neon_vrsqrteq_v: 2999 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; 3000 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); 3001 3002 case NEON::BI__builtin_neon_vrshr_n_v: 3003 case NEON::BI__builtin_neon_vrshrq_n_v: 3004 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 3005 1, true); 3006 case NEON::BI__builtin_neon_vshl_n_v: 3007 case NEON::BI__builtin_neon_vshlq_n_v: 3008 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 3009 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], 3010 "vshl_n"); 3011 case NEON::BI__builtin_neon_vshll_n_v: { 3012 llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); 3013 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3014 if (Usgn) 3015 Ops[0] = Builder.CreateZExt(Ops[0], VTy); 3016 else 3017 Ops[0] = Builder.CreateSExt(Ops[0], VTy); 3018 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); 3019 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); 3020 } 3021 case NEON::BI__builtin_neon_vshrn_n_v: { 3022 llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); 3023 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3024 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); 3025 if (Usgn) 3026 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); 3027 else 3028 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); 3029 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); 3030 } 3031 case NEON::BI__builtin_neon_vshr_n_v: 3032 case NEON::BI__builtin_neon_vshrq_n_v: 3033 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n"); 3034 case NEON::BI__builtin_neon_vst1_v: 3035 case NEON::BI__builtin_neon_vst1q_v: 3036 case NEON::BI__builtin_neon_vst2_v: 3037 case NEON::BI__builtin_neon_vst2q_v: 3038 case NEON::BI__builtin_neon_vst3_v: 3039 case NEON::BI__builtin_neon_vst3q_v: 3040 case NEON::BI__builtin_neon_vst4_v: 3041 case NEON::BI__builtin_neon_vst4q_v: 3042 case NEON::BI__builtin_neon_vst2_lane_v: 3043 case NEON::BI__builtin_neon_vst2q_lane_v: 3044 case NEON::BI__builtin_neon_vst3_lane_v: 3045 case NEON::BI__builtin_neon_vst3q_lane_v: 3046 case NEON::BI__builtin_neon_vst4_lane_v: 3047 case NEON::BI__builtin_neon_vst4q_lane_v: 3048 Ops.push_back(Align); 3049 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, ""); 3050 case NEON::BI__builtin_neon_vsubhn_v: { 3051 llvm::VectorType *SrcTy = 3052 llvm::VectorType::getExtendedElementVectorType(VTy); 3053 3054 // %sum = add <4 x i32> %lhs, %rhs 3055 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3056 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 3057 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); 3058 3059 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 3060 Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(), 3061 SrcTy->getScalarSizeInBits() / 2); 3062 ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt); 3063 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); 3064 3065 // %res = trunc <4 x i32> %high to <4 x i16> 3066 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); 3067 } 3068 case NEON::BI__builtin_neon_vtrn_v: 3069 case NEON::BI__builtin_neon_vtrnq_v: { 3070 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 3071 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3072 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3073 Value *SV = nullptr; 3074 3075 for (unsigned vi = 0; vi != 2; ++vi) { 3076 SmallVector<Constant*, 16> Indices; 3077 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 3078 Indices.push_back(Builder.getInt32(i+vi)); 3079 Indices.push_back(Builder.getInt32(i+e+vi)); 3080 } 3081 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 3082 SV = llvm::ConstantVector::get(Indices); 3083 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); 3084 SV = Builder.CreateStore(SV, Addr); 3085 } 3086 return SV; 3087 } 3088 case NEON::BI__builtin_neon_vtst_v: 3089 case NEON::BI__builtin_neon_vtstq_v: { 3090 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3091 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3092 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 3093 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 3094 ConstantAggregateZero::get(Ty)); 3095 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 3096 } 3097 case NEON::BI__builtin_neon_vuzp_v: 3098 case NEON::BI__builtin_neon_vuzpq_v: { 3099 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 3100 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3101 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3102 Value *SV = nullptr; 3103 3104 for (unsigned vi = 0; vi != 2; ++vi) { 3105 SmallVector<Constant*, 16> Indices; 3106 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 3107 Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); 3108 3109 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 3110 SV = llvm::ConstantVector::get(Indices); 3111 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); 3112 SV = Builder.CreateStore(SV, Addr); 3113 } 3114 return SV; 3115 } 3116 case NEON::BI__builtin_neon_vzip_v: 3117 case NEON::BI__builtin_neon_vzipq_v: { 3118 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 3119 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3120 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3121 Value *SV = nullptr; 3122 3123 for (unsigned vi = 0; vi != 2; ++vi) { 3124 SmallVector<Constant*, 16> Indices; 3125 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 3126 Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); 3127 Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); 3128 } 3129 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 3130 SV = llvm::ConstantVector::get(Indices); 3131 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); 3132 SV = Builder.CreateStore(SV, Addr); 3133 } 3134 return SV; 3135 } 3136 } 3137 3138 assert(Int && "Expected valid intrinsic number"); 3139 3140 // Determine the type(s) of this overloaded AArch64 intrinsic. 3141 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E); 3142 3143 Value *Result = EmitNeonCall(F, Ops, NameHint); 3144 llvm::Type *ResultType = ConvertType(E->getType()); 3145 // AArch64 intrinsic one-element vector type cast to 3146 // scalar type expected by the builtin 3147 return Builder.CreateBitCast(Result, ResultType, NameHint); 3148 } 3149 3150 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( 3151 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, 3152 const CmpInst::Predicate Ip, const Twine &Name) { 3153 llvm::Type *OTy = Op->getType(); 3154 3155 // FIXME: this is utterly horrific. We should not be looking at previous 3156 // codegen context to find out what needs doing. Unfortunately TableGen 3157 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32 3158 // (etc). 3159 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op)) 3160 OTy = BI->getOperand(0)->getType(); 3161 3162 Op = Builder.CreateBitCast(Op, OTy); 3163 if (OTy->getScalarType()->isFloatingPointTy()) { 3164 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); 3165 } else { 3166 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); 3167 } 3168 return Builder.CreateSExt(Op, Ty, Name); 3169 } 3170 3171 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 3172 Value *ExtOp, Value *IndexOp, 3173 llvm::Type *ResTy, unsigned IntID, 3174 const char *Name) { 3175 SmallVector<Value *, 2> TblOps; 3176 if (ExtOp) 3177 TblOps.push_back(ExtOp); 3178 3179 // Build a vector containing sequential number like (0, 1, 2, ..., 15) 3180 SmallVector<Constant*, 16> Indices; 3181 llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); 3182 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { 3183 Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i)); 3184 Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1)); 3185 } 3186 Value *SV = llvm::ConstantVector::get(Indices); 3187 3188 int PairPos = 0, End = Ops.size() - 1; 3189 while (PairPos < End) { 3190 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 3191 Ops[PairPos+1], SV, Name)); 3192 PairPos += 2; 3193 } 3194 3195 // If there's an odd number of 64-bit lookup table, fill the high 64-bit 3196 // of the 128-bit lookup table with zero. 3197 if (PairPos == End) { 3198 Value *ZeroTbl = ConstantAggregateZero::get(TblTy); 3199 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 3200 ZeroTbl, SV, Name)); 3201 } 3202 3203 Function *TblF; 3204 TblOps.push_back(IndexOp); 3205 TblF = CGF.CGM.getIntrinsic(IntID, ResTy); 3206 3207 return CGF.EmitNeonCall(TblF, TblOps, Name); 3208 } 3209 3210 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { 3211 switch (BuiltinID) { 3212 default: 3213 return nullptr; 3214 case ARM::BI__builtin_arm_nop: 3215 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 3216 llvm::ConstantInt::get(Int32Ty, 0)); 3217 case ARM::BI__builtin_arm_yield: 3218 case ARM::BI__yield: 3219 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 3220 llvm::ConstantInt::get(Int32Ty, 1)); 3221 case ARM::BI__builtin_arm_wfe: 3222 case ARM::BI__wfe: 3223 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 3224 llvm::ConstantInt::get(Int32Ty, 2)); 3225 case ARM::BI__builtin_arm_wfi: 3226 case ARM::BI__wfi: 3227 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 3228 llvm::ConstantInt::get(Int32Ty, 3)); 3229 case ARM::BI__builtin_arm_sev: 3230 case ARM::BI__sev: 3231 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 3232 llvm::ConstantInt::get(Int32Ty, 4)); 3233 case ARM::BI__builtin_arm_sevl: 3234 case ARM::BI__sevl: 3235 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 3236 llvm::ConstantInt::get(Int32Ty, 5)); 3237 } 3238 } 3239 3240 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 3241 const CallExpr *E) { 3242 if (auto Hint = GetValueForARMHint(BuiltinID)) 3243 return Hint; 3244 3245 if (BuiltinID == ARM::BI__emit) { 3246 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb; 3247 llvm::FunctionType *FTy = 3248 llvm::FunctionType::get(VoidTy, /*Variadic=*/false); 3249 3250 APSInt Value; 3251 if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext())) 3252 llvm_unreachable("Sema will ensure that the parameter is constant"); 3253 3254 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue(); 3255 3256 llvm::InlineAsm *Emit = 3257 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", 3258 /*SideEffects=*/true) 3259 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", 3260 /*SideEffects=*/true); 3261 3262 return Builder.CreateCall(Emit); 3263 } 3264 3265 if (BuiltinID == ARM::BI__builtin_arm_dbg) { 3266 Value *Option = EmitScalarExpr(E->getArg(0)); 3267 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); 3268 } 3269 3270 if (BuiltinID == ARM::BI__builtin_arm_prefetch) { 3271 Value *Address = EmitScalarExpr(E->getArg(0)); 3272 Value *RW = EmitScalarExpr(E->getArg(1)); 3273 Value *IsData = EmitScalarExpr(E->getArg(2)); 3274 3275 // Locality is not supported on ARM target 3276 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); 3277 3278 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 3279 return Builder.CreateCall4(F, Address, RW, Locality, IsData); 3280 } 3281 3282 if (BuiltinID == ARM::BI__builtin_arm_rbit) { 3283 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit), 3284 EmitScalarExpr(E->getArg(0)), 3285 "rbit"); 3286 } 3287 3288 if (BuiltinID == ARM::BI__clear_cache) { 3289 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 3290 const FunctionDecl *FD = E->getDirectCallee(); 3291 SmallVector<Value*, 2> Ops; 3292 for (unsigned i = 0; i < 2; i++) 3293 Ops.push_back(EmitScalarExpr(E->getArg(i))); 3294 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 3295 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 3296 StringRef Name = FD->getName(); 3297 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 3298 } 3299 3300 if (BuiltinID == ARM::BI__builtin_arm_ldrexd || 3301 ((BuiltinID == ARM::BI__builtin_arm_ldrex || 3302 BuiltinID == ARM::BI__builtin_arm_ldaex) && 3303 getContext().getTypeSize(E->getType()) == 64) || 3304 BuiltinID == ARM::BI__ldrexd) { 3305 Function *F; 3306 3307 switch (BuiltinID) { 3308 default: llvm_unreachable("unexpected builtin"); 3309 case ARM::BI__builtin_arm_ldaex: 3310 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); 3311 break; 3312 case ARM::BI__builtin_arm_ldrexd: 3313 case ARM::BI__builtin_arm_ldrex: 3314 case ARM::BI__ldrexd: 3315 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 3316 break; 3317 } 3318 3319 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 3320 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 3321 "ldrexd"); 3322 3323 Value *Val0 = Builder.CreateExtractValue(Val, 1); 3324 Value *Val1 = Builder.CreateExtractValue(Val, 0); 3325 Val0 = Builder.CreateZExt(Val0, Int64Ty); 3326 Val1 = Builder.CreateZExt(Val1, Int64Ty); 3327 3328 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 3329 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 3330 Val = Builder.CreateOr(Val, Val1); 3331 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 3332 } 3333 3334 if (BuiltinID == ARM::BI__builtin_arm_ldrex || 3335 BuiltinID == ARM::BI__builtin_arm_ldaex) { 3336 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 3337 3338 QualType Ty = E->getType(); 3339 llvm::Type *RealResTy = ConvertType(Ty); 3340 llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(), 3341 getContext().getTypeSize(Ty)); 3342 LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo()); 3343 3344 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex 3345 ? Intrinsic::arm_ldaex 3346 : Intrinsic::arm_ldrex, 3347 LoadAddr->getType()); 3348 Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); 3349 3350 if (RealResTy->isPointerTy()) 3351 return Builder.CreateIntToPtr(Val, RealResTy); 3352 else { 3353 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 3354 return Builder.CreateBitCast(Val, RealResTy); 3355 } 3356 } 3357 3358 if (BuiltinID == ARM::BI__builtin_arm_strexd || 3359 ((BuiltinID == ARM::BI__builtin_arm_stlex || 3360 BuiltinID == ARM::BI__builtin_arm_strex) && 3361 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { 3362 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 3363 ? Intrinsic::arm_stlexd 3364 : Intrinsic::arm_strexd); 3365 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr); 3366 3367 Value *Tmp = CreateMemTemp(E->getArg(0)->getType()); 3368 Value *Val = EmitScalarExpr(E->getArg(0)); 3369 Builder.CreateStore(Val, Tmp); 3370 3371 Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 3372 Val = Builder.CreateLoad(LdPtr); 3373 3374 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 3375 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 3376 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); 3377 return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd"); 3378 } 3379 3380 if (BuiltinID == ARM::BI__builtin_arm_strex || 3381 BuiltinID == ARM::BI__builtin_arm_stlex) { 3382 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 3383 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 3384 3385 QualType Ty = E->getArg(0)->getType(); 3386 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 3387 getContext().getTypeSize(Ty)); 3388 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 3389 3390 if (StoreVal->getType()->isPointerTy()) 3391 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); 3392 else { 3393 StoreVal = Builder.CreateBitCast(StoreVal, StoreTy); 3394 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); 3395 } 3396 3397 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 3398 ? Intrinsic::arm_stlex 3399 : Intrinsic::arm_strex, 3400 StoreAddr->getType()); 3401 return Builder.CreateCall2(F, StoreVal, StoreAddr, "strex"); 3402 } 3403 3404 if (BuiltinID == ARM::BI__builtin_arm_clrex) { 3405 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); 3406 return Builder.CreateCall(F); 3407 } 3408 3409 // CRC32 3410 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 3411 switch (BuiltinID) { 3412 case ARM::BI__builtin_arm_crc32b: 3413 CRCIntrinsicID = Intrinsic::arm_crc32b; break; 3414 case ARM::BI__builtin_arm_crc32cb: 3415 CRCIntrinsicID = Intrinsic::arm_crc32cb; break; 3416 case ARM::BI__builtin_arm_crc32h: 3417 CRCIntrinsicID = Intrinsic::arm_crc32h; break; 3418 case ARM::BI__builtin_arm_crc32ch: 3419 CRCIntrinsicID = Intrinsic::arm_crc32ch; break; 3420 case ARM::BI__builtin_arm_crc32w: 3421 case ARM::BI__builtin_arm_crc32d: 3422 CRCIntrinsicID = Intrinsic::arm_crc32w; break; 3423 case ARM::BI__builtin_arm_crc32cw: 3424 case ARM::BI__builtin_arm_crc32cd: 3425 CRCIntrinsicID = Intrinsic::arm_crc32cw; break; 3426 } 3427 3428 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 3429 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 3430 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 3431 3432 // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w 3433 // intrinsics, hence we need different codegen for these cases. 3434 if (BuiltinID == ARM::BI__builtin_arm_crc32d || 3435 BuiltinID == ARM::BI__builtin_arm_crc32cd) { 3436 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 3437 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); 3438 Value *Arg1b = Builder.CreateLShr(Arg1, C1); 3439 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); 3440 3441 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 3442 Value *Res = Builder.CreateCall2(F, Arg0, Arg1a); 3443 return Builder.CreateCall2(F, Res, Arg1b); 3444 } else { 3445 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); 3446 3447 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 3448 return Builder.CreateCall2(F, Arg0, Arg1); 3449 } 3450 } 3451 3452 SmallVector<Value*, 4> Ops; 3453 llvm::Value *Align = nullptr; 3454 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 3455 if (i == 0) { 3456 switch (BuiltinID) { 3457 case NEON::BI__builtin_neon_vld1_v: 3458 case NEON::BI__builtin_neon_vld1q_v: 3459 case NEON::BI__builtin_neon_vld1q_lane_v: 3460 case NEON::BI__builtin_neon_vld1_lane_v: 3461 case NEON::BI__builtin_neon_vld1_dup_v: 3462 case NEON::BI__builtin_neon_vld1q_dup_v: 3463 case NEON::BI__builtin_neon_vst1_v: 3464 case NEON::BI__builtin_neon_vst1q_v: 3465 case NEON::BI__builtin_neon_vst1q_lane_v: 3466 case NEON::BI__builtin_neon_vst1_lane_v: 3467 case NEON::BI__builtin_neon_vst2_v: 3468 case NEON::BI__builtin_neon_vst2q_v: 3469 case NEON::BI__builtin_neon_vst2_lane_v: 3470 case NEON::BI__builtin_neon_vst2q_lane_v: 3471 case NEON::BI__builtin_neon_vst3_v: 3472 case NEON::BI__builtin_neon_vst3q_v: 3473 case NEON::BI__builtin_neon_vst3_lane_v: 3474 case NEON::BI__builtin_neon_vst3q_lane_v: 3475 case NEON::BI__builtin_neon_vst4_v: 3476 case NEON::BI__builtin_neon_vst4q_v: 3477 case NEON::BI__builtin_neon_vst4_lane_v: 3478 case NEON::BI__builtin_neon_vst4q_lane_v: 3479 // Get the alignment for the argument in addition to the value; 3480 // we'll use it later. 3481 std::pair<llvm::Value*, unsigned> Src = 3482 EmitPointerWithAlignment(E->getArg(0)); 3483 Ops.push_back(Src.first); 3484 Align = Builder.getInt32(Src.second); 3485 continue; 3486 } 3487 } 3488 if (i == 1) { 3489 switch (BuiltinID) { 3490 case NEON::BI__builtin_neon_vld2_v: 3491 case NEON::BI__builtin_neon_vld2q_v: 3492 case NEON::BI__builtin_neon_vld3_v: 3493 case NEON::BI__builtin_neon_vld3q_v: 3494 case NEON::BI__builtin_neon_vld4_v: 3495 case NEON::BI__builtin_neon_vld4q_v: 3496 case NEON::BI__builtin_neon_vld2_lane_v: 3497 case NEON::BI__builtin_neon_vld2q_lane_v: 3498 case NEON::BI__builtin_neon_vld3_lane_v: 3499 case NEON::BI__builtin_neon_vld3q_lane_v: 3500 case NEON::BI__builtin_neon_vld4_lane_v: 3501 case NEON::BI__builtin_neon_vld4q_lane_v: 3502 case NEON::BI__builtin_neon_vld2_dup_v: 3503 case NEON::BI__builtin_neon_vld3_dup_v: 3504 case NEON::BI__builtin_neon_vld4_dup_v: 3505 // Get the alignment for the argument in addition to the value; 3506 // we'll use it later. 3507 std::pair<llvm::Value*, unsigned> Src = 3508 EmitPointerWithAlignment(E->getArg(1)); 3509 Ops.push_back(Src.first); 3510 Align = Builder.getInt32(Src.second); 3511 continue; 3512 } 3513 } 3514 Ops.push_back(EmitScalarExpr(E->getArg(i))); 3515 } 3516 3517 switch (BuiltinID) { 3518 default: break; 3519 // vget_lane and vset_lane are not overloaded and do not have an extra 3520 // argument that specifies the vector type. 3521 case NEON::BI__builtin_neon_vget_lane_i8: 3522 case NEON::BI__builtin_neon_vget_lane_i16: 3523 case NEON::BI__builtin_neon_vget_lane_i32: 3524 case NEON::BI__builtin_neon_vget_lane_i64: 3525 case NEON::BI__builtin_neon_vget_lane_f32: 3526 case NEON::BI__builtin_neon_vgetq_lane_i8: 3527 case NEON::BI__builtin_neon_vgetq_lane_i16: 3528 case NEON::BI__builtin_neon_vgetq_lane_i32: 3529 case NEON::BI__builtin_neon_vgetq_lane_i64: 3530 case NEON::BI__builtin_neon_vgetq_lane_f32: 3531 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 3532 "vget_lane"); 3533 case NEON::BI__builtin_neon_vset_lane_i8: 3534 case NEON::BI__builtin_neon_vset_lane_i16: 3535 case NEON::BI__builtin_neon_vset_lane_i32: 3536 case NEON::BI__builtin_neon_vset_lane_i64: 3537 case NEON::BI__builtin_neon_vset_lane_f32: 3538 case NEON::BI__builtin_neon_vsetq_lane_i8: 3539 case NEON::BI__builtin_neon_vsetq_lane_i16: 3540 case NEON::BI__builtin_neon_vsetq_lane_i32: 3541 case NEON::BI__builtin_neon_vsetq_lane_i64: 3542 case NEON::BI__builtin_neon_vsetq_lane_f32: 3543 Ops.push_back(EmitScalarExpr(E->getArg(2))); 3544 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 3545 3546 // Non-polymorphic crypto instructions also not overloaded 3547 case NEON::BI__builtin_neon_vsha1h_u32: 3548 Ops.push_back(EmitScalarExpr(E->getArg(0))); 3549 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops, 3550 "vsha1h"); 3551 case NEON::BI__builtin_neon_vsha1cq_u32: 3552 Ops.push_back(EmitScalarExpr(E->getArg(2))); 3553 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops, 3554 "vsha1h"); 3555 case NEON::BI__builtin_neon_vsha1pq_u32: 3556 Ops.push_back(EmitScalarExpr(E->getArg(2))); 3557 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops, 3558 "vsha1h"); 3559 case NEON::BI__builtin_neon_vsha1mq_u32: 3560 Ops.push_back(EmitScalarExpr(E->getArg(2))); 3561 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, 3562 "vsha1h"); 3563 } 3564 3565 // Get the last argument, which specifies the vector type. 3566 llvm::APSInt Result; 3567 const Expr *Arg = E->getArg(E->getNumArgs()-1); 3568 if (!Arg->isIntegerConstantExpr(Result, getContext())) 3569 return nullptr; 3570 3571 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 3572 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 3573 // Determine the overloaded type of this builtin. 3574 llvm::Type *Ty; 3575 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 3576 Ty = FloatTy; 3577 else 3578 Ty = DoubleTy; 3579 3580 // Determine whether this is an unsigned conversion or not. 3581 bool usgn = Result.getZExtValue() == 1; 3582 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 3583 3584 // Call the appropriate intrinsic. 3585 Function *F = CGM.getIntrinsic(Int, Ty); 3586 return Builder.CreateCall(F, Ops, "vcvtr"); 3587 } 3588 3589 // Determine the type of this overloaded NEON intrinsic. 3590 NeonTypeFlags Type(Result.getZExtValue()); 3591 bool usgn = Type.isUnsigned(); 3592 bool rightShift = false; 3593 3594 llvm::VectorType *VTy = GetNeonType(this, Type); 3595 llvm::Type *Ty = VTy; 3596 if (!Ty) 3597 return nullptr; 3598 3599 // Many NEON builtins have identical semantics and uses in ARM and 3600 // AArch64. Emit these in a single function. 3601 auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap); 3602 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 3603 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); 3604 if (Builtin) 3605 return EmitCommonNeonBuiltinExpr( 3606 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 3607 Builtin->NameHint, Builtin->TypeModifier, E, Ops, Align); 3608 3609 unsigned Int; 3610 switch (BuiltinID) { 3611 default: return nullptr; 3612 case NEON::BI__builtin_neon_vld1q_lane_v: 3613 // Handle 64-bit integer elements as a special case. Use shuffles of 3614 // one-element vectors to avoid poor code for i64 in the backend. 3615 if (VTy->getElementType()->isIntegerTy(64)) { 3616 // Extract the other lane. 3617 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3618 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); 3619 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); 3620 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 3621 // Load the value as a one-element vector. 3622 Ty = llvm::VectorType::get(VTy->getElementType(), 1); 3623 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty); 3624 Value *Ld = Builder.CreateCall2(F, Ops[0], Align); 3625 // Combine them. 3626 SmallVector<Constant*, 2> Indices; 3627 Indices.push_back(ConstantInt::get(Int32Ty, 1-Lane)); 3628 Indices.push_back(ConstantInt::get(Int32Ty, Lane)); 3629 SV = llvm::ConstantVector::get(Indices); 3630 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); 3631 } 3632 // fall through 3633 case NEON::BI__builtin_neon_vld1_lane_v: { 3634 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3635 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 3636 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3637 LoadInst *Ld = Builder.CreateLoad(Ops[0]); 3638 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 3639 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 3640 } 3641 case NEON::BI__builtin_neon_vld2_dup_v: 3642 case NEON::BI__builtin_neon_vld3_dup_v: 3643 case NEON::BI__builtin_neon_vld4_dup_v: { 3644 // Handle 64-bit elements as a special-case. There is no "dup" needed. 3645 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { 3646 switch (BuiltinID) { 3647 case NEON::BI__builtin_neon_vld2_dup_v: 3648 Int = Intrinsic::arm_neon_vld2; 3649 break; 3650 case NEON::BI__builtin_neon_vld3_dup_v: 3651 Int = Intrinsic::arm_neon_vld3; 3652 break; 3653 case NEON::BI__builtin_neon_vld4_dup_v: 3654 Int = Intrinsic::arm_neon_vld4; 3655 break; 3656 default: llvm_unreachable("unknown vld_dup intrinsic?"); 3657 } 3658 Function *F = CGM.getIntrinsic(Int, Ty); 3659 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup"); 3660 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3661 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3662 return Builder.CreateStore(Ops[1], Ops[0]); 3663 } 3664 switch (BuiltinID) { 3665 case NEON::BI__builtin_neon_vld2_dup_v: 3666 Int = Intrinsic::arm_neon_vld2lane; 3667 break; 3668 case NEON::BI__builtin_neon_vld3_dup_v: 3669 Int = Intrinsic::arm_neon_vld3lane; 3670 break; 3671 case NEON::BI__builtin_neon_vld4_dup_v: 3672 Int = Intrinsic::arm_neon_vld4lane; 3673 break; 3674 default: llvm_unreachable("unknown vld_dup intrinsic?"); 3675 } 3676 Function *F = CGM.getIntrinsic(Int, Ty); 3677 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 3678 3679 SmallVector<Value*, 6> Args; 3680 Args.push_back(Ops[1]); 3681 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 3682 3683 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 3684 Args.push_back(CI); 3685 Args.push_back(Align); 3686 3687 Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); 3688 // splat lane 0 to all elts in each vector of the result. 3689 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 3690 Value *Val = Builder.CreateExtractValue(Ops[1], i); 3691 Value *Elt = Builder.CreateBitCast(Val, Ty); 3692 Elt = EmitNeonSplat(Elt, CI); 3693 Elt = Builder.CreateBitCast(Elt, Val->getType()); 3694 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 3695 } 3696 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3697 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3698 return Builder.CreateStore(Ops[1], Ops[0]); 3699 } 3700 case NEON::BI__builtin_neon_vqrshrn_n_v: 3701 Int = 3702 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 3703 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 3704 1, true); 3705 case NEON::BI__builtin_neon_vqrshrun_n_v: 3706 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), 3707 Ops, "vqrshrun_n", 1, true); 3708 case NEON::BI__builtin_neon_vqshrn_n_v: 3709 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 3710 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 3711 1, true); 3712 case NEON::BI__builtin_neon_vqshrun_n_v: 3713 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), 3714 Ops, "vqshrun_n", 1, true); 3715 case NEON::BI__builtin_neon_vrecpe_v: 3716 case NEON::BI__builtin_neon_vrecpeq_v: 3717 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), 3718 Ops, "vrecpe"); 3719 case NEON::BI__builtin_neon_vrshrn_n_v: 3720 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), 3721 Ops, "vrshrn_n", 1, true); 3722 case NEON::BI__builtin_neon_vrsra_n_v: 3723 case NEON::BI__builtin_neon_vrsraq_n_v: 3724 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3725 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3726 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 3727 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 3728 Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]); 3729 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 3730 case NEON::BI__builtin_neon_vsri_n_v: 3731 case NEON::BI__builtin_neon_vsriq_n_v: 3732 rightShift = true; 3733 case NEON::BI__builtin_neon_vsli_n_v: 3734 case NEON::BI__builtin_neon_vsliq_n_v: 3735 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 3736 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), 3737 Ops, "vsli_n"); 3738 case NEON::BI__builtin_neon_vsra_n_v: 3739 case NEON::BI__builtin_neon_vsraq_n_v: 3740 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3741 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 3742 return Builder.CreateAdd(Ops[0], Ops[1]); 3743 case NEON::BI__builtin_neon_vst1q_lane_v: 3744 // Handle 64-bit integer elements as a special case. Use a shuffle to get 3745 // a one-element vector and avoid poor code for i64 in the backend. 3746 if (VTy->getElementType()->isIntegerTy(64)) { 3747 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3748 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); 3749 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 3750 Ops[2] = Align; 3751 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, 3752 Ops[1]->getType()), Ops); 3753 } 3754 // fall through 3755 case NEON::BI__builtin_neon_vst1_lane_v: { 3756 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3757 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 3758 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3759 StoreInst *St = Builder.CreateStore(Ops[1], 3760 Builder.CreateBitCast(Ops[0], Ty)); 3761 St->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 3762 return St; 3763 } 3764 case NEON::BI__builtin_neon_vtbl1_v: 3765 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 3766 Ops, "vtbl1"); 3767 case NEON::BI__builtin_neon_vtbl2_v: 3768 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 3769 Ops, "vtbl2"); 3770 case NEON::BI__builtin_neon_vtbl3_v: 3771 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 3772 Ops, "vtbl3"); 3773 case NEON::BI__builtin_neon_vtbl4_v: 3774 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 3775 Ops, "vtbl4"); 3776 case NEON::BI__builtin_neon_vtbx1_v: 3777 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 3778 Ops, "vtbx1"); 3779 case NEON::BI__builtin_neon_vtbx2_v: 3780 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 3781 Ops, "vtbx2"); 3782 case NEON::BI__builtin_neon_vtbx3_v: 3783 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 3784 Ops, "vtbx3"); 3785 case NEON::BI__builtin_neon_vtbx4_v: 3786 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 3787 Ops, "vtbx4"); 3788 } 3789 } 3790 3791 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, 3792 const CallExpr *E, 3793 SmallVectorImpl<Value *> &Ops) { 3794 unsigned int Int = 0; 3795 const char *s = nullptr; 3796 3797 switch (BuiltinID) { 3798 default: 3799 return nullptr; 3800 case NEON::BI__builtin_neon_vtbl1_v: 3801 case NEON::BI__builtin_neon_vqtbl1_v: 3802 case NEON::BI__builtin_neon_vqtbl1q_v: 3803 case NEON::BI__builtin_neon_vtbl2_v: 3804 case NEON::BI__builtin_neon_vqtbl2_v: 3805 case NEON::BI__builtin_neon_vqtbl2q_v: 3806 case NEON::BI__builtin_neon_vtbl3_v: 3807 case NEON::BI__builtin_neon_vqtbl3_v: 3808 case NEON::BI__builtin_neon_vqtbl3q_v: 3809 case NEON::BI__builtin_neon_vtbl4_v: 3810 case NEON::BI__builtin_neon_vqtbl4_v: 3811 case NEON::BI__builtin_neon_vqtbl4q_v: 3812 break; 3813 case NEON::BI__builtin_neon_vtbx1_v: 3814 case NEON::BI__builtin_neon_vqtbx1_v: 3815 case NEON::BI__builtin_neon_vqtbx1q_v: 3816 case NEON::BI__builtin_neon_vtbx2_v: 3817 case NEON::BI__builtin_neon_vqtbx2_v: 3818 case NEON::BI__builtin_neon_vqtbx2q_v: 3819 case NEON::BI__builtin_neon_vtbx3_v: 3820 case NEON::BI__builtin_neon_vqtbx3_v: 3821 case NEON::BI__builtin_neon_vqtbx3q_v: 3822 case NEON::BI__builtin_neon_vtbx4_v: 3823 case NEON::BI__builtin_neon_vqtbx4_v: 3824 case NEON::BI__builtin_neon_vqtbx4q_v: 3825 break; 3826 } 3827 3828 assert(E->getNumArgs() >= 3); 3829 3830 // Get the last argument, which specifies the vector type. 3831 llvm::APSInt Result; 3832 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 3833 if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) 3834 return nullptr; 3835 3836 // Determine the type of this overloaded NEON intrinsic. 3837 NeonTypeFlags Type(Result.getZExtValue()); 3838 llvm::VectorType *VTy = GetNeonType(&CGF, Type); 3839 llvm::Type *Ty = VTy; 3840 if (!Ty) 3841 return nullptr; 3842 3843 unsigned nElts = VTy->getNumElements(); 3844 3845 CodeGen::CGBuilderTy &Builder = CGF.Builder; 3846 3847 // AArch64 scalar builtins are not overloaded, they do not have an extra 3848 // argument that specifies the vector type, need to handle each case. 3849 SmallVector<Value *, 2> TblOps; 3850 switch (BuiltinID) { 3851 case NEON::BI__builtin_neon_vtbl1_v: { 3852 TblOps.push_back(Ops[0]); 3853 return packTBLDVectorList(CGF, TblOps, nullptr, Ops[1], Ty, 3854 Intrinsic::aarch64_neon_tbl1, "vtbl1"); 3855 } 3856 case NEON::BI__builtin_neon_vtbl2_v: { 3857 TblOps.push_back(Ops[0]); 3858 TblOps.push_back(Ops[1]); 3859 return packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty, 3860 Intrinsic::aarch64_neon_tbl1, "vtbl1"); 3861 } 3862 case NEON::BI__builtin_neon_vtbl3_v: { 3863 TblOps.push_back(Ops[0]); 3864 TblOps.push_back(Ops[1]); 3865 TblOps.push_back(Ops[2]); 3866 return packTBLDVectorList(CGF, TblOps, nullptr, Ops[3], Ty, 3867 Intrinsic::aarch64_neon_tbl2, "vtbl2"); 3868 } 3869 case NEON::BI__builtin_neon_vtbl4_v: { 3870 TblOps.push_back(Ops[0]); 3871 TblOps.push_back(Ops[1]); 3872 TblOps.push_back(Ops[2]); 3873 TblOps.push_back(Ops[3]); 3874 return packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty, 3875 Intrinsic::aarch64_neon_tbl2, "vtbl2"); 3876 } 3877 case NEON::BI__builtin_neon_vtbx1_v: { 3878 TblOps.push_back(Ops[1]); 3879 Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty, 3880 Intrinsic::aarch64_neon_tbl1, "vtbl1"); 3881 3882 llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8); 3883 Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight); 3884 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); 3885 CmpRes = Builder.CreateSExt(CmpRes, Ty); 3886 3887 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 3888 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 3889 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 3890 } 3891 case NEON::BI__builtin_neon_vtbx2_v: { 3892 TblOps.push_back(Ops[1]); 3893 TblOps.push_back(Ops[2]); 3894 return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty, 3895 Intrinsic::aarch64_neon_tbx1, "vtbx1"); 3896 } 3897 case NEON::BI__builtin_neon_vtbx3_v: { 3898 TblOps.push_back(Ops[1]); 3899 TblOps.push_back(Ops[2]); 3900 TblOps.push_back(Ops[3]); 3901 Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty, 3902 Intrinsic::aarch64_neon_tbl2, "vtbl2"); 3903 3904 llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24); 3905 Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour); 3906 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], 3907 TwentyFourV); 3908 CmpRes = Builder.CreateSExt(CmpRes, Ty); 3909 3910 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 3911 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 3912 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 3913 } 3914 case NEON::BI__builtin_neon_vtbx4_v: { 3915 TblOps.push_back(Ops[1]); 3916 TblOps.push_back(Ops[2]); 3917 TblOps.push_back(Ops[3]); 3918 TblOps.push_back(Ops[4]); 3919 return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty, 3920 Intrinsic::aarch64_neon_tbx2, "vtbx2"); 3921 } 3922 case NEON::BI__builtin_neon_vqtbl1_v: 3923 case NEON::BI__builtin_neon_vqtbl1q_v: 3924 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break; 3925 case NEON::BI__builtin_neon_vqtbl2_v: 3926 case NEON::BI__builtin_neon_vqtbl2q_v: { 3927 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break; 3928 case NEON::BI__builtin_neon_vqtbl3_v: 3929 case NEON::BI__builtin_neon_vqtbl3q_v: 3930 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break; 3931 case NEON::BI__builtin_neon_vqtbl4_v: 3932 case NEON::BI__builtin_neon_vqtbl4q_v: 3933 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break; 3934 case NEON::BI__builtin_neon_vqtbx1_v: 3935 case NEON::BI__builtin_neon_vqtbx1q_v: 3936 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break; 3937 case NEON::BI__builtin_neon_vqtbx2_v: 3938 case NEON::BI__builtin_neon_vqtbx2q_v: 3939 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break; 3940 case NEON::BI__builtin_neon_vqtbx3_v: 3941 case NEON::BI__builtin_neon_vqtbx3q_v: 3942 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break; 3943 case NEON::BI__builtin_neon_vqtbx4_v: 3944 case NEON::BI__builtin_neon_vqtbx4q_v: 3945 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break; 3946 } 3947 } 3948 3949 if (!Int) 3950 return nullptr; 3951 3952 Function *F = CGF.CGM.getIntrinsic(Int, Ty); 3953 return CGF.EmitNeonCall(F, Ops, s); 3954 } 3955 3956 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { 3957 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 3958 Op = Builder.CreateBitCast(Op, Int16Ty); 3959 Value *V = UndefValue::get(VTy); 3960 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 3961 Op = Builder.CreateInsertElement(V, Op, CI); 3962 return Op; 3963 } 3964 3965 Value *CodeGenFunction::vectorWrapScalar8(Value *Op) { 3966 llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8); 3967 Op = Builder.CreateBitCast(Op, Int8Ty); 3968 Value *V = UndefValue::get(VTy); 3969 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 3970 Op = Builder.CreateInsertElement(V, Op, CI); 3971 return Op; 3972 } 3973 3974 Value *CodeGenFunction:: 3975 emitVectorWrappedScalar8Intrinsic(unsigned Int, SmallVectorImpl<Value*> &Ops, 3976 const char *Name) { 3977 // i8 is not a legal types for AArch64, so we can't just use 3978 // a normal overloaded intrinsic call for these scalar types. Instead 3979 // we'll build 64-bit vectors w/ lane zero being our input values and 3980 // perform the operation on that. The back end can pattern match directly 3981 // to the scalar instruction. 3982 Ops[0] = vectorWrapScalar8(Ops[0]); 3983 Ops[1] = vectorWrapScalar8(Ops[1]); 3984 llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8); 3985 Value *V = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, Name); 3986 Constant *CI = ConstantInt::get(SizeTy, 0); 3987 return Builder.CreateExtractElement(V, CI, "lane0"); 3988 } 3989 3990 Value *CodeGenFunction:: 3991 emitVectorWrappedScalar16Intrinsic(unsigned Int, SmallVectorImpl<Value*> &Ops, 3992 const char *Name) { 3993 // i16 is not a legal types for AArch64, so we can't just use 3994 // a normal overloaded intrinsic call for these scalar types. Instead 3995 // we'll build 64-bit vectors w/ lane zero being our input values and 3996 // perform the operation on that. The back end can pattern match directly 3997 // to the scalar instruction. 3998 Ops[0] = vectorWrapScalar16(Ops[0]); 3999 Ops[1] = vectorWrapScalar16(Ops[1]); 4000 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 4001 Value *V = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, Name); 4002 Constant *CI = ConstantInt::get(SizeTy, 0); 4003 return Builder.CreateExtractElement(V, CI, "lane0"); 4004 } 4005 4006 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, 4007 const CallExpr *E) { 4008 unsigned HintID = static_cast<unsigned>(-1); 4009 switch (BuiltinID) { 4010 default: break; 4011 case AArch64::BI__builtin_arm_nop: 4012 HintID = 0; 4013 break; 4014 case AArch64::BI__builtin_arm_yield: 4015 HintID = 1; 4016 break; 4017 case AArch64::BI__builtin_arm_wfe: 4018 HintID = 2; 4019 break; 4020 case AArch64::BI__builtin_arm_wfi: 4021 HintID = 3; 4022 break; 4023 case AArch64::BI__builtin_arm_sev: 4024 HintID = 4; 4025 break; 4026 case AArch64::BI__builtin_arm_sevl: 4027 HintID = 5; 4028 break; 4029 } 4030 4031 if (HintID != static_cast<unsigned>(-1)) { 4032 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint); 4033 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 4034 } 4035 4036 if (BuiltinID == AArch64::BI__builtin_arm_prefetch) { 4037 Value *Address = EmitScalarExpr(E->getArg(0)); 4038 Value *RW = EmitScalarExpr(E->getArg(1)); 4039 Value *CacheLevel = EmitScalarExpr(E->getArg(2)); 4040 Value *RetentionPolicy = EmitScalarExpr(E->getArg(3)); 4041 Value *IsData = EmitScalarExpr(E->getArg(4)); 4042 4043 Value *Locality = nullptr; 4044 if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) { 4045 // Temporal fetch, needs to convert cache level to locality. 4046 Locality = llvm::ConstantInt::get(Int32Ty, 4047 -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3); 4048 } else { 4049 // Streaming fetch. 4050 Locality = llvm::ConstantInt::get(Int32Ty, 0); 4051 } 4052 4053 // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify 4054 // PLDL3STRM or PLDL2STRM. 4055 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 4056 return Builder.CreateCall4(F, Address, RW, Locality, IsData); 4057 } 4058 4059 if (BuiltinID == AArch64::BI__builtin_arm_rbit) { 4060 assert((getContext().getTypeSize(E->getType()) == 32) && 4061 "rbit of unusual size!"); 4062 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 4063 return Builder.CreateCall( 4064 CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit"); 4065 } 4066 if (BuiltinID == AArch64::BI__builtin_arm_rbit64) { 4067 assert((getContext().getTypeSize(E->getType()) == 64) && 4068 "rbit of unusual size!"); 4069 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 4070 return Builder.CreateCall( 4071 CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit"); 4072 } 4073 4074 if (BuiltinID == AArch64::BI__clear_cache) { 4075 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 4076 const FunctionDecl *FD = E->getDirectCallee(); 4077 SmallVector<Value*, 2> Ops; 4078 for (unsigned i = 0; i < 2; i++) 4079 Ops.push_back(EmitScalarExpr(E->getArg(i))); 4080 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 4081 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 4082 StringRef Name = FD->getName(); 4083 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 4084 } 4085 4086 if ((BuiltinID == AArch64::BI__builtin_arm_ldrex || 4087 BuiltinID == AArch64::BI__builtin_arm_ldaex) && 4088 getContext().getTypeSize(E->getType()) == 128) { 4089 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 4090 ? Intrinsic::aarch64_ldaxp 4091 : Intrinsic::aarch64_ldxp); 4092 4093 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 4094 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 4095 "ldxp"); 4096 4097 Value *Val0 = Builder.CreateExtractValue(Val, 1); 4098 Value *Val1 = Builder.CreateExtractValue(Val, 0); 4099 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 4100 Val0 = Builder.CreateZExt(Val0, Int128Ty); 4101 Val1 = Builder.CreateZExt(Val1, Int128Ty); 4102 4103 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64); 4104 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 4105 Val = Builder.CreateOr(Val, Val1); 4106 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 4107 } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex || 4108 BuiltinID == AArch64::BI__builtin_arm_ldaex) { 4109 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 4110 4111 QualType Ty = E->getType(); 4112 llvm::Type *RealResTy = ConvertType(Ty); 4113 llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(), 4114 getContext().getTypeSize(Ty)); 4115 LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo()); 4116 4117 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 4118 ? Intrinsic::aarch64_ldaxr 4119 : Intrinsic::aarch64_ldxr, 4120 LoadAddr->getType()); 4121 Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); 4122 4123 if (RealResTy->isPointerTy()) 4124 return Builder.CreateIntToPtr(Val, RealResTy); 4125 4126 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 4127 return Builder.CreateBitCast(Val, RealResTy); 4128 } 4129 4130 if ((BuiltinID == AArch64::BI__builtin_arm_strex || 4131 BuiltinID == AArch64::BI__builtin_arm_stlex) && 4132 getContext().getTypeSize(E->getArg(0)->getType()) == 128) { 4133 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 4134 ? Intrinsic::aarch64_stlxp 4135 : Intrinsic::aarch64_stxp); 4136 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr); 4137 4138 Value *One = llvm::ConstantInt::get(Int32Ty, 1); 4139 Value *Tmp = Builder.CreateAlloca(ConvertType(E->getArg(0)->getType()), 4140 One); 4141 Value *Val = EmitScalarExpr(E->getArg(0)); 4142 Builder.CreateStore(Val, Tmp); 4143 4144 Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 4145 Val = Builder.CreateLoad(LdPtr); 4146 4147 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 4148 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 4149 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), 4150 Int8PtrTy); 4151 return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "stxp"); 4152 } else if (BuiltinID == AArch64::BI__builtin_arm_strex || 4153 BuiltinID == AArch64::BI__builtin_arm_stlex) { 4154 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 4155 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 4156 4157 QualType Ty = E->getArg(0)->getType(); 4158 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 4159 getContext().getTypeSize(Ty)); 4160 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 4161 4162 if (StoreVal->getType()->isPointerTy()) 4163 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); 4164 else { 4165 StoreVal = Builder.CreateBitCast(StoreVal, StoreTy); 4166 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); 4167 } 4168 4169 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 4170 ? Intrinsic::aarch64_stlxr 4171 : Intrinsic::aarch64_stxr, 4172 StoreAddr->getType()); 4173 return Builder.CreateCall2(F, StoreVal, StoreAddr, "stxr"); 4174 } 4175 4176 if (BuiltinID == AArch64::BI__builtin_arm_clrex) { 4177 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); 4178 return Builder.CreateCall(F); 4179 } 4180 4181 // CRC32 4182 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 4183 switch (BuiltinID) { 4184 case AArch64::BI__builtin_arm_crc32b: 4185 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; 4186 case AArch64::BI__builtin_arm_crc32cb: 4187 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; 4188 case AArch64::BI__builtin_arm_crc32h: 4189 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; 4190 case AArch64::BI__builtin_arm_crc32ch: 4191 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; 4192 case AArch64::BI__builtin_arm_crc32w: 4193 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; 4194 case AArch64::BI__builtin_arm_crc32cw: 4195 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; 4196 case AArch64::BI__builtin_arm_crc32d: 4197 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; 4198 case AArch64::BI__builtin_arm_crc32cd: 4199 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; 4200 } 4201 4202 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 4203 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 4204 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 4205 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4206 4207 llvm::Type *DataTy = F->getFunctionType()->getParamType(1); 4208 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); 4209 4210 return Builder.CreateCall2(F, Arg0, Arg1); 4211 } 4212 4213 llvm::SmallVector<Value*, 4> Ops; 4214 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) 4215 Ops.push_back(EmitScalarExpr(E->getArg(i))); 4216 4217 auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap); 4218 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 4219 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); 4220 4221 if (Builtin) { 4222 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); 4223 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); 4224 assert(Result && "SISD intrinsic should have been handled"); 4225 return Result; 4226 } 4227 4228 llvm::APSInt Result; 4229 const Expr *Arg = E->getArg(E->getNumArgs()-1); 4230 NeonTypeFlags Type(0); 4231 if (Arg->isIntegerConstantExpr(Result, getContext())) 4232 // Determine the type of this overloaded NEON intrinsic. 4233 Type = NeonTypeFlags(Result.getZExtValue()); 4234 4235 bool usgn = Type.isUnsigned(); 4236 bool quad = Type.isQuad(); 4237 4238 // Handle non-overloaded intrinsics first. 4239 switch (BuiltinID) { 4240 default: break; 4241 case NEON::BI__builtin_neon_vldrq_p128: { 4242 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 4243 Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); 4244 return Builder.CreateLoad(Ptr); 4245 } 4246 case NEON::BI__builtin_neon_vstrq_p128: { 4247 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 4248 Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); 4249 return Builder.CreateStore(EmitScalarExpr(E->getArg(1)), Ptr); 4250 } 4251 case NEON::BI__builtin_neon_vcvts_u32_f32: 4252 case NEON::BI__builtin_neon_vcvtd_u64_f64: 4253 usgn = true; 4254 // FALL THROUGH 4255 case NEON::BI__builtin_neon_vcvts_s32_f32: 4256 case NEON::BI__builtin_neon_vcvtd_s64_f64: { 4257 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4258 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 4259 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 4260 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 4261 Ops[0] = Builder.CreateBitCast(Ops[0], FTy); 4262 if (usgn) 4263 return Builder.CreateFPToUI(Ops[0], InTy); 4264 return Builder.CreateFPToSI(Ops[0], InTy); 4265 } 4266 case NEON::BI__builtin_neon_vcvts_f32_u32: 4267 case NEON::BI__builtin_neon_vcvtd_f64_u64: 4268 usgn = true; 4269 // FALL THROUGH 4270 case NEON::BI__builtin_neon_vcvts_f32_s32: 4271 case NEON::BI__builtin_neon_vcvtd_f64_s64: { 4272 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4273 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 4274 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 4275 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 4276 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 4277 if (usgn) 4278 return Builder.CreateUIToFP(Ops[0], FTy); 4279 return Builder.CreateSIToFP(Ops[0], FTy); 4280 } 4281 case NEON::BI__builtin_neon_vpaddd_s64: { 4282 llvm::Type *Ty = 4283 llvm::VectorType::get(llvm::Type::getInt64Ty(getLLVMContext()), 2); 4284 Value *Vec = EmitScalarExpr(E->getArg(0)); 4285 // The vector is v2f64, so make sure it's bitcast to that. 4286 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); 4287 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 4288 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 4289 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 4290 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 4291 // Pairwise addition of a v2f64 into a scalar f64. 4292 return Builder.CreateAdd(Op0, Op1, "vpaddd"); 4293 } 4294 case NEON::BI__builtin_neon_vpaddd_f64: { 4295 llvm::Type *Ty = 4296 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2); 4297 Value *Vec = EmitScalarExpr(E->getArg(0)); 4298 // The vector is v2f64, so make sure it's bitcast to that. 4299 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); 4300 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 4301 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 4302 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 4303 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 4304 // Pairwise addition of a v2f64 into a scalar f64. 4305 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 4306 } 4307 case NEON::BI__builtin_neon_vpadds_f32: { 4308 llvm::Type *Ty = 4309 llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2); 4310 Value *Vec = EmitScalarExpr(E->getArg(0)); 4311 // The vector is v2f32, so make sure it's bitcast to that. 4312 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); 4313 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 4314 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 4315 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 4316 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 4317 // Pairwise addition of a v2f32 into a scalar f32. 4318 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 4319 } 4320 case NEON::BI__builtin_neon_vceqzd_s64: 4321 case NEON::BI__builtin_neon_vceqzd_f64: 4322 case NEON::BI__builtin_neon_vceqzs_f32: 4323 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4324 return EmitAArch64CompareBuiltinExpr( 4325 Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OEQ, 4326 ICmpInst::ICMP_EQ, "vceqz"); 4327 case NEON::BI__builtin_neon_vcgezd_s64: 4328 case NEON::BI__builtin_neon_vcgezd_f64: 4329 case NEON::BI__builtin_neon_vcgezs_f32: 4330 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4331 return EmitAArch64CompareBuiltinExpr( 4332 Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OGE, 4333 ICmpInst::ICMP_SGE, "vcgez"); 4334 case NEON::BI__builtin_neon_vclezd_s64: 4335 case NEON::BI__builtin_neon_vclezd_f64: 4336 case NEON::BI__builtin_neon_vclezs_f32: 4337 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4338 return EmitAArch64CompareBuiltinExpr( 4339 Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OLE, 4340 ICmpInst::ICMP_SLE, "vclez"); 4341 case NEON::BI__builtin_neon_vcgtzd_s64: 4342 case NEON::BI__builtin_neon_vcgtzd_f64: 4343 case NEON::BI__builtin_neon_vcgtzs_f32: 4344 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4345 return EmitAArch64CompareBuiltinExpr( 4346 Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OGT, 4347 ICmpInst::ICMP_SGT, "vcgtz"); 4348 case NEON::BI__builtin_neon_vcltzd_s64: 4349 case NEON::BI__builtin_neon_vcltzd_f64: 4350 case NEON::BI__builtin_neon_vcltzs_f32: 4351 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4352 return EmitAArch64CompareBuiltinExpr( 4353 Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OLT, 4354 ICmpInst::ICMP_SLT, "vcltz"); 4355 4356 case NEON::BI__builtin_neon_vceqzd_u64: { 4357 llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext()); 4358 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4359 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4360 Ops[0] = Builder.CreateICmp(llvm::ICmpInst::ICMP_EQ, Ops[0], 4361 llvm::Constant::getNullValue(Ty)); 4362 return Builder.CreateSExt(Ops[0], Ty, "vceqzd"); 4363 } 4364 case NEON::BI__builtin_neon_vceqd_f64: 4365 case NEON::BI__builtin_neon_vcled_f64: 4366 case NEON::BI__builtin_neon_vcltd_f64: 4367 case NEON::BI__builtin_neon_vcged_f64: 4368 case NEON::BI__builtin_neon_vcgtd_f64: { 4369 llvm::CmpInst::Predicate P; 4370 switch (BuiltinID) { 4371 default: llvm_unreachable("missing builtin ID in switch!"); 4372 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break; 4373 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break; 4374 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break; 4375 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break; 4376 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break; 4377 } 4378 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4379 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 4380 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 4381 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 4382 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); 4383 } 4384 case NEON::BI__builtin_neon_vceqs_f32: 4385 case NEON::BI__builtin_neon_vcles_f32: 4386 case NEON::BI__builtin_neon_vclts_f32: 4387 case NEON::BI__builtin_neon_vcges_f32: 4388 case NEON::BI__builtin_neon_vcgts_f32: { 4389 llvm::CmpInst::Predicate P; 4390 switch (BuiltinID) { 4391 default: llvm_unreachable("missing builtin ID in switch!"); 4392 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break; 4393 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break; 4394 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break; 4395 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break; 4396 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break; 4397 } 4398 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4399 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 4400 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); 4401 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 4402 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); 4403 } 4404 case NEON::BI__builtin_neon_vceqd_s64: 4405 case NEON::BI__builtin_neon_vceqd_u64: 4406 case NEON::BI__builtin_neon_vcgtd_s64: 4407 case NEON::BI__builtin_neon_vcgtd_u64: 4408 case NEON::BI__builtin_neon_vcltd_s64: 4409 case NEON::BI__builtin_neon_vcltd_u64: 4410 case NEON::BI__builtin_neon_vcged_u64: 4411 case NEON::BI__builtin_neon_vcged_s64: 4412 case NEON::BI__builtin_neon_vcled_u64: 4413 case NEON::BI__builtin_neon_vcled_s64: { 4414 llvm::CmpInst::Predicate P; 4415 switch (BuiltinID) { 4416 default: llvm_unreachable("missing builtin ID in switch!"); 4417 case NEON::BI__builtin_neon_vceqd_s64: 4418 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break; 4419 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break; 4420 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break; 4421 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break; 4422 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break; 4423 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break; 4424 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break; 4425 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break; 4426 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break; 4427 } 4428 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4429 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 4430 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 4431 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); 4432 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); 4433 } 4434 case NEON::BI__builtin_neon_vtstd_s64: 4435 case NEON::BI__builtin_neon_vtstd_u64: { 4436 llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext()); 4437 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4438 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4439 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4440 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 4441 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 4442 llvm::Constant::getNullValue(Ty)); 4443 return Builder.CreateSExt(Ops[0], Ty, "vtstd"); 4444 } 4445 case NEON::BI__builtin_neon_vset_lane_i8: 4446 case NEON::BI__builtin_neon_vset_lane_i16: 4447 case NEON::BI__builtin_neon_vset_lane_i32: 4448 case NEON::BI__builtin_neon_vset_lane_i64: 4449 case NEON::BI__builtin_neon_vset_lane_f32: 4450 case NEON::BI__builtin_neon_vsetq_lane_i8: 4451 case NEON::BI__builtin_neon_vsetq_lane_i16: 4452 case NEON::BI__builtin_neon_vsetq_lane_i32: 4453 case NEON::BI__builtin_neon_vsetq_lane_i64: 4454 case NEON::BI__builtin_neon_vsetq_lane_f32: 4455 Ops.push_back(EmitScalarExpr(E->getArg(2))); 4456 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4457 case NEON::BI__builtin_neon_vset_lane_f64: 4458 // The vector type needs a cast for the v1f64 variant. 4459 Ops[1] = Builder.CreateBitCast(Ops[1], 4460 llvm::VectorType::get(DoubleTy, 1)); 4461 Ops.push_back(EmitScalarExpr(E->getArg(2))); 4462 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4463 case NEON::BI__builtin_neon_vsetq_lane_f64: 4464 // The vector type needs a cast for the v2f64 variant. 4465 Ops[1] = Builder.CreateBitCast(Ops[1], 4466 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2)); 4467 Ops.push_back(EmitScalarExpr(E->getArg(2))); 4468 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4469 4470 case NEON::BI__builtin_neon_vget_lane_i8: 4471 case NEON::BI__builtin_neon_vdupb_lane_i8: 4472 Ops[0] = Builder.CreateBitCast(Ops[0], 4473 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8)); 4474 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4475 "vget_lane"); 4476 case NEON::BI__builtin_neon_vgetq_lane_i8: 4477 case NEON::BI__builtin_neon_vdupb_laneq_i8: 4478 Ops[0] = Builder.CreateBitCast(Ops[0], 4479 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16)); 4480 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4481 "vgetq_lane"); 4482 case NEON::BI__builtin_neon_vget_lane_i16: 4483 case NEON::BI__builtin_neon_vduph_lane_i16: 4484 Ops[0] = Builder.CreateBitCast(Ops[0], 4485 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4)); 4486 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4487 "vget_lane"); 4488 case NEON::BI__builtin_neon_vgetq_lane_i16: 4489 case NEON::BI__builtin_neon_vduph_laneq_i16: 4490 Ops[0] = Builder.CreateBitCast(Ops[0], 4491 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8)); 4492 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4493 "vgetq_lane"); 4494 case NEON::BI__builtin_neon_vget_lane_i32: 4495 case NEON::BI__builtin_neon_vdups_lane_i32: 4496 Ops[0] = Builder.CreateBitCast( 4497 Ops[0], 4498 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 32), 2)); 4499 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4500 "vget_lane"); 4501 case NEON::BI__builtin_neon_vdups_lane_f32: 4502 Ops[0] = Builder.CreateBitCast(Ops[0], 4503 llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2)); 4504 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4505 "vdups_lane"); 4506 case NEON::BI__builtin_neon_vgetq_lane_i32: 4507 case NEON::BI__builtin_neon_vdups_laneq_i32: 4508 Ops[0] = Builder.CreateBitCast(Ops[0], 4509 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 32), 4)); 4510 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4511 "vgetq_lane"); 4512 case NEON::BI__builtin_neon_vget_lane_i64: 4513 case NEON::BI__builtin_neon_vdupd_lane_i64: 4514 Ops[0] = Builder.CreateBitCast(Ops[0], 4515 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 64), 1)); 4516 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4517 "vget_lane"); 4518 case NEON::BI__builtin_neon_vdupd_lane_f64: 4519 Ops[0] = Builder.CreateBitCast(Ops[0], 4520 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 1)); 4521 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4522 "vdupd_lane"); 4523 case NEON::BI__builtin_neon_vgetq_lane_i64: 4524 case NEON::BI__builtin_neon_vdupd_laneq_i64: 4525 Ops[0] = Builder.CreateBitCast(Ops[0], 4526 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 64), 2)); 4527 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4528 "vgetq_lane"); 4529 case NEON::BI__builtin_neon_vget_lane_f32: 4530 Ops[0] = Builder.CreateBitCast(Ops[0], 4531 llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2)); 4532 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4533 "vget_lane"); 4534 case NEON::BI__builtin_neon_vget_lane_f64: 4535 Ops[0] = Builder.CreateBitCast(Ops[0], 4536 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 1)); 4537 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4538 "vget_lane"); 4539 case NEON::BI__builtin_neon_vgetq_lane_f32: 4540 case NEON::BI__builtin_neon_vdups_laneq_f32: 4541 Ops[0] = Builder.CreateBitCast(Ops[0], 4542 llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 4)); 4543 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4544 "vgetq_lane"); 4545 case NEON::BI__builtin_neon_vgetq_lane_f64: 4546 case NEON::BI__builtin_neon_vdupd_laneq_f64: 4547 Ops[0] = Builder.CreateBitCast(Ops[0], 4548 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2)); 4549 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4550 "vgetq_lane"); 4551 case NEON::BI__builtin_neon_vaddd_s64: 4552 case NEON::BI__builtin_neon_vaddd_u64: 4553 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); 4554 case NEON::BI__builtin_neon_vsubd_s64: 4555 case NEON::BI__builtin_neon_vsubd_u64: 4556 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd"); 4557 case NEON::BI__builtin_neon_vqdmlalh_s16: 4558 case NEON::BI__builtin_neon_vqdmlslh_s16: { 4559 SmallVector<Value *, 2> ProductOps; 4560 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 4561 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); 4562 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 4563 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 4564 ProductOps, "vqdmlXl"); 4565 Constant *CI = ConstantInt::get(SizeTy, 0); 4566 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 4567 4568 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 4569 ? Intrinsic::aarch64_neon_sqadd 4570 : Intrinsic::aarch64_neon_sqsub; 4571 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl"); 4572 } 4573 case NEON::BI__builtin_neon_vqshlud_n_s64: { 4574 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4575 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 4576 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty), 4577 Ops, "vqshlu_n"); 4578 } 4579 case NEON::BI__builtin_neon_vqshld_n_u64: 4580 case NEON::BI__builtin_neon_vqshld_n_s64: { 4581 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64 4582 ? Intrinsic::aarch64_neon_uqshl 4583 : Intrinsic::aarch64_neon_sqshl; 4584 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4585 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 4586 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n"); 4587 } 4588 case NEON::BI__builtin_neon_vrshrd_n_u64: 4589 case NEON::BI__builtin_neon_vrshrd_n_s64: { 4590 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64 4591 ? Intrinsic::aarch64_neon_urshl 4592 : Intrinsic::aarch64_neon_srshl; 4593 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4594 int SV = cast<ConstantInt>(Ops[1])->getSExtValue(); 4595 Ops[1] = ConstantInt::get(Int64Ty, -SV); 4596 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n"); 4597 } 4598 case NEON::BI__builtin_neon_vrsrad_n_u64: 4599 case NEON::BI__builtin_neon_vrsrad_n_s64: { 4600 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64 4601 ? Intrinsic::aarch64_neon_urshl 4602 : Intrinsic::aarch64_neon_srshl; 4603 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 4604 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); 4605 Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Int64Ty), Ops[1], 4606 Builder.CreateSExt(Ops[2], Int64Ty)); 4607 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); 4608 } 4609 case NEON::BI__builtin_neon_vshld_n_s64: 4610 case NEON::BI__builtin_neon_vshld_n_u64: { 4611 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 4612 return Builder.CreateShl( 4613 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n"); 4614 } 4615 case NEON::BI__builtin_neon_vshrd_n_s64: { 4616 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 4617 return Builder.CreateAShr( 4618 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 4619 Amt->getZExtValue())), 4620 "shrd_n"); 4621 } 4622 case NEON::BI__builtin_neon_vshrd_n_u64: { 4623 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 4624 uint64_t ShiftAmt = Amt->getZExtValue(); 4625 // Right-shifting an unsigned value by its size yields 0. 4626 if (ShiftAmt == 64) 4627 return ConstantInt::get(Int64Ty, 0); 4628 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt), 4629 "shrd_n"); 4630 } 4631 case NEON::BI__builtin_neon_vsrad_n_s64: { 4632 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 4633 Ops[1] = Builder.CreateAShr( 4634 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 4635 Amt->getZExtValue())), 4636 "shrd_n"); 4637 return Builder.CreateAdd(Ops[0], Ops[1]); 4638 } 4639 case NEON::BI__builtin_neon_vsrad_n_u64: { 4640 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 4641 uint64_t ShiftAmt = Amt->getZExtValue(); 4642 // Right-shifting an unsigned value by its size yields 0. 4643 // As Op + 0 = Op, return Ops[0] directly. 4644 if (ShiftAmt == 64) 4645 return Ops[0]; 4646 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt), 4647 "shrd_n"); 4648 return Builder.CreateAdd(Ops[0], Ops[1]); 4649 } 4650 case NEON::BI__builtin_neon_vqdmlalh_lane_s16: 4651 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: 4652 case NEON::BI__builtin_neon_vqdmlslh_lane_s16: 4653 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: { 4654 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 4655 "lane"); 4656 SmallVector<Value *, 2> ProductOps; 4657 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 4658 ProductOps.push_back(vectorWrapScalar16(Ops[2])); 4659 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 4660 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 4661 ProductOps, "vqdmlXl"); 4662 Constant *CI = ConstantInt::get(SizeTy, 0); 4663 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 4664 Ops.pop_back(); 4665 4666 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 || 4667 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16) 4668 ? Intrinsic::aarch64_neon_sqadd 4669 : Intrinsic::aarch64_neon_sqsub; 4670 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl"); 4671 } 4672 case NEON::BI__builtin_neon_vqdmlals_s32: 4673 case NEON::BI__builtin_neon_vqdmlsls_s32: { 4674 SmallVector<Value *, 2> ProductOps; 4675 ProductOps.push_back(Ops[1]); 4676 ProductOps.push_back(EmitScalarExpr(E->getArg(2))); 4677 Ops[1] = 4678 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 4679 ProductOps, "vqdmlXl"); 4680 4681 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32 4682 ? Intrinsic::aarch64_neon_sqadd 4683 : Intrinsic::aarch64_neon_sqsub; 4684 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl"); 4685 } 4686 case NEON::BI__builtin_neon_vqdmlals_lane_s32: 4687 case NEON::BI__builtin_neon_vqdmlals_laneq_s32: 4688 case NEON::BI__builtin_neon_vqdmlsls_lane_s32: 4689 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { 4690 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 4691 "lane"); 4692 SmallVector<Value *, 2> ProductOps; 4693 ProductOps.push_back(Ops[1]); 4694 ProductOps.push_back(Ops[2]); 4695 Ops[1] = 4696 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 4697 ProductOps, "vqdmlXl"); 4698 Ops.pop_back(); 4699 4700 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 || 4701 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32) 4702 ? Intrinsic::aarch64_neon_sqadd 4703 : Intrinsic::aarch64_neon_sqsub; 4704 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); 4705 } 4706 } 4707 4708 llvm::VectorType *VTy = GetNeonType(this, Type); 4709 llvm::Type *Ty = VTy; 4710 if (!Ty) 4711 return nullptr; 4712 4713 // Not all intrinsics handled by the common case work for AArch64 yet, so only 4714 // defer to common code if it's been added to our special map. 4715 Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, 4716 AArch64SIMDIntrinsicsProvenSorted); 4717 4718 if (Builtin) 4719 return EmitCommonNeonBuiltinExpr( 4720 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 4721 Builtin->NameHint, Builtin->TypeModifier, E, Ops, nullptr); 4722 4723 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops)) 4724 return V; 4725 4726 unsigned Int; 4727 switch (BuiltinID) { 4728 default: return nullptr; 4729 case NEON::BI__builtin_neon_vbsl_v: 4730 case NEON::BI__builtin_neon_vbslq_v: { 4731 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy); 4732 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl"); 4733 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl"); 4734 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl"); 4735 4736 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl"); 4737 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl"); 4738 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl"); 4739 return Builder.CreateBitCast(Ops[0], Ty); 4740 } 4741 case NEON::BI__builtin_neon_vfma_lane_v: 4742 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types 4743 // The ARM builtins (and instructions) have the addend as the first 4744 // operand, but the 'fma' intrinsics have it last. Swap it around here. 4745 Value *Addend = Ops[0]; 4746 Value *Multiplicand = Ops[1]; 4747 Value *LaneSource = Ops[2]; 4748 Ops[0] = Multiplicand; 4749 Ops[1] = LaneSource; 4750 Ops[2] = Addend; 4751 4752 // Now adjust things to handle the lane access. 4753 llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ? 4754 llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) : 4755 VTy; 4756 llvm::Constant *cst = cast<Constant>(Ops[3]); 4757 Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst); 4758 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); 4759 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); 4760 4761 Ops.pop_back(); 4762 Int = Intrinsic::fma; 4763 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); 4764 } 4765 case NEON::BI__builtin_neon_vfma_laneq_v: { 4766 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 4767 // v1f64 fma should be mapped to Neon scalar f64 fma 4768 if (VTy && VTy->getElementType() == DoubleTy) { 4769 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 4770 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 4771 llvm::Type *VTy = GetNeonType(this, 4772 NeonTypeFlags(NeonTypeFlags::Float64, false, true)); 4773 Ops[2] = Builder.CreateBitCast(Ops[2], VTy); 4774 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 4775 Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); 4776 Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 4777 return Builder.CreateBitCast(Result, Ty); 4778 } 4779 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 4780 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4781 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4782 4783 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 4784 VTy->getNumElements() * 2); 4785 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 4786 Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), 4787 cast<ConstantInt>(Ops[3])); 4788 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 4789 4790 return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); 4791 } 4792 case NEON::BI__builtin_neon_vfmaq_laneq_v: { 4793 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 4794 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4795 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4796 4797 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4798 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 4799 return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); 4800 } 4801 case NEON::BI__builtin_neon_vfmas_lane_f32: 4802 case NEON::BI__builtin_neon_vfmas_laneq_f32: 4803 case NEON::BI__builtin_neon_vfmad_lane_f64: 4804 case NEON::BI__builtin_neon_vfmad_laneq_f64: { 4805 Ops.push_back(EmitScalarExpr(E->getArg(3))); 4806 llvm::Type *Ty = ConvertType(E->getCallReturnType()); 4807 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 4808 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 4809 return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 4810 } 4811 case NEON::BI__builtin_neon_vfms_v: 4812 case NEON::BI__builtin_neon_vfmsq_v: { // Only used for FP types 4813 // FIXME: probably remove when we no longer support aarch64_simd.h 4814 // (arm_neon.h delegates to vfma). 4815 4816 // The ARM builtins (and instructions) have the addend as the first 4817 // operand, but the 'fma' intrinsics have it last. Swap it around here. 4818 Value *Subtrahend = Ops[0]; 4819 Value *Multiplicand = Ops[2]; 4820 Ops[0] = Multiplicand; 4821 Ops[2] = Subtrahend; 4822 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 4823 Ops[1] = Builder.CreateFNeg(Ops[1]); 4824 Int = Intrinsic::fma; 4825 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmls"); 4826 } 4827 case NEON::BI__builtin_neon_vmull_v: 4828 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4829 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; 4830 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull; 4831 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 4832 case NEON::BI__builtin_neon_vmax_v: 4833 case NEON::BI__builtin_neon_vmaxq_v: 4834 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4835 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; 4836 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; 4837 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); 4838 case NEON::BI__builtin_neon_vmin_v: 4839 case NEON::BI__builtin_neon_vminq_v: 4840 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4841 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; 4842 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; 4843 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); 4844 case NEON::BI__builtin_neon_vabd_v: 4845 case NEON::BI__builtin_neon_vabdq_v: 4846 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4847 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd; 4848 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd; 4849 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); 4850 case NEON::BI__builtin_neon_vpadal_v: 4851 case NEON::BI__builtin_neon_vpadalq_v: { 4852 unsigned ArgElts = VTy->getNumElements(); 4853 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType()); 4854 unsigned BitWidth = EltTy->getBitWidth(); 4855 llvm::Type *ArgTy = llvm::VectorType::get( 4856 llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts); 4857 llvm::Type* Tys[2] = { VTy, ArgTy }; 4858 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp; 4859 SmallVector<llvm::Value*, 1> TmpOps; 4860 TmpOps.push_back(Ops[1]); 4861 Function *F = CGM.getIntrinsic(Int, Tys); 4862 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal"); 4863 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType()); 4864 return Builder.CreateAdd(tmp, addend); 4865 } 4866 case NEON::BI__builtin_neon_vpmin_v: 4867 case NEON::BI__builtin_neon_vpminq_v: 4868 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4869 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp; 4870 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp; 4871 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 4872 case NEON::BI__builtin_neon_vpmax_v: 4873 case NEON::BI__builtin_neon_vpmaxq_v: 4874 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4875 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp; 4876 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp; 4877 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 4878 case NEON::BI__builtin_neon_vminnm_v: 4879 case NEON::BI__builtin_neon_vminnmq_v: 4880 Int = Intrinsic::aarch64_neon_fminnm; 4881 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); 4882 case NEON::BI__builtin_neon_vmaxnm_v: 4883 case NEON::BI__builtin_neon_vmaxnmq_v: 4884 Int = Intrinsic::aarch64_neon_fmaxnm; 4885 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); 4886 case NEON::BI__builtin_neon_vrecpss_f32: { 4887 llvm::Type *f32Type = llvm::Type::getFloatTy(getLLVMContext()); 4888 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4889 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, f32Type), 4890 Ops, "vrecps"); 4891 } 4892 case NEON::BI__builtin_neon_vrecpsd_f64: { 4893 llvm::Type *f64Type = llvm::Type::getDoubleTy(getLLVMContext()); 4894 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4895 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, f64Type), 4896 Ops, "vrecps"); 4897 } 4898 case NEON::BI__builtin_neon_vqshrun_n_v: 4899 Int = Intrinsic::aarch64_neon_sqshrun; 4900 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); 4901 case NEON::BI__builtin_neon_vqrshrun_n_v: 4902 Int = Intrinsic::aarch64_neon_sqrshrun; 4903 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); 4904 case NEON::BI__builtin_neon_vqshrn_n_v: 4905 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn; 4906 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); 4907 case NEON::BI__builtin_neon_vrshrn_n_v: 4908 Int = Intrinsic::aarch64_neon_rshrn; 4909 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); 4910 case NEON::BI__builtin_neon_vqrshrn_n_v: 4911 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; 4912 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); 4913 case NEON::BI__builtin_neon_vrnda_v: 4914 case NEON::BI__builtin_neon_vrndaq_v: { 4915 Int = Intrinsic::round; 4916 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); 4917 } 4918 case NEON::BI__builtin_neon_vrndi_v: 4919 case NEON::BI__builtin_neon_vrndiq_v: { 4920 Int = Intrinsic::nearbyint; 4921 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); 4922 } 4923 case NEON::BI__builtin_neon_vrndm_v: 4924 case NEON::BI__builtin_neon_vrndmq_v: { 4925 Int = Intrinsic::floor; 4926 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); 4927 } 4928 case NEON::BI__builtin_neon_vrndn_v: 4929 case NEON::BI__builtin_neon_vrndnq_v: { 4930 Int = Intrinsic::aarch64_neon_frintn; 4931 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); 4932 } 4933 case NEON::BI__builtin_neon_vrndp_v: 4934 case NEON::BI__builtin_neon_vrndpq_v: { 4935 Int = Intrinsic::ceil; 4936 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); 4937 } 4938 case NEON::BI__builtin_neon_vrndx_v: 4939 case NEON::BI__builtin_neon_vrndxq_v: { 4940 Int = Intrinsic::rint; 4941 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); 4942 } 4943 case NEON::BI__builtin_neon_vrnd_v: 4944 case NEON::BI__builtin_neon_vrndq_v: { 4945 Int = Intrinsic::trunc; 4946 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); 4947 } 4948 case NEON::BI__builtin_neon_vceqz_v: 4949 case NEON::BI__builtin_neon_vceqzq_v: 4950 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, 4951 ICmpInst::ICMP_EQ, "vceqz"); 4952 case NEON::BI__builtin_neon_vcgez_v: 4953 case NEON::BI__builtin_neon_vcgezq_v: 4954 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, 4955 ICmpInst::ICMP_SGE, "vcgez"); 4956 case NEON::BI__builtin_neon_vclez_v: 4957 case NEON::BI__builtin_neon_vclezq_v: 4958 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, 4959 ICmpInst::ICMP_SLE, "vclez"); 4960 case NEON::BI__builtin_neon_vcgtz_v: 4961 case NEON::BI__builtin_neon_vcgtzq_v: 4962 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, 4963 ICmpInst::ICMP_SGT, "vcgtz"); 4964 case NEON::BI__builtin_neon_vcltz_v: 4965 case NEON::BI__builtin_neon_vcltzq_v: 4966 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, 4967 ICmpInst::ICMP_SLT, "vcltz"); 4968 case NEON::BI__builtin_neon_vcvt_f64_v: 4969 case NEON::BI__builtin_neon_vcvtq_f64_v: 4970 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4971 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 4972 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 4973 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 4974 case NEON::BI__builtin_neon_vcvt_f64_f32: { 4975 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && 4976 "unexpected vcvt_f64_f32 builtin"); 4977 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); 4978 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 4979 4980 return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); 4981 } 4982 case NEON::BI__builtin_neon_vcvt_f32_f64: { 4983 assert(Type.getEltType() == NeonTypeFlags::Float32 && 4984 "unexpected vcvt_f32_f64 builtin"); 4985 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); 4986 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 4987 4988 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); 4989 } 4990 case NEON::BI__builtin_neon_vcvt_s32_v: 4991 case NEON::BI__builtin_neon_vcvt_u32_v: 4992 case NEON::BI__builtin_neon_vcvt_s64_v: 4993 case NEON::BI__builtin_neon_vcvt_u64_v: 4994 case NEON::BI__builtin_neon_vcvtq_s32_v: 4995 case NEON::BI__builtin_neon_vcvtq_u32_v: 4996 case NEON::BI__builtin_neon_vcvtq_s64_v: 4997 case NEON::BI__builtin_neon_vcvtq_u64_v: { 4998 bool Double = 4999 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 5000 llvm::Type *InTy = 5001 GetNeonType(this, 5002 NeonTypeFlags(Double ? NeonTypeFlags::Float64 5003 : NeonTypeFlags::Float32, false, quad)); 5004 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 5005 if (usgn) 5006 return Builder.CreateFPToUI(Ops[0], Ty); 5007 return Builder.CreateFPToSI(Ops[0], Ty); 5008 } 5009 case NEON::BI__builtin_neon_vcvta_s32_v: 5010 case NEON::BI__builtin_neon_vcvtaq_s32_v: 5011 case NEON::BI__builtin_neon_vcvta_u32_v: 5012 case NEON::BI__builtin_neon_vcvtaq_u32_v: 5013 case NEON::BI__builtin_neon_vcvta_s64_v: 5014 case NEON::BI__builtin_neon_vcvtaq_s64_v: 5015 case NEON::BI__builtin_neon_vcvta_u64_v: 5016 case NEON::BI__builtin_neon_vcvtaq_u64_v: { 5017 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas; 5018 bool Double = 5019 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 5020 llvm::Type *InTy = 5021 GetNeonType(this, 5022 NeonTypeFlags(Double ? NeonTypeFlags::Float64 5023 : NeonTypeFlags::Float32, false, quad)); 5024 llvm::Type *Tys[2] = { Ty, InTy }; 5025 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); 5026 } 5027 case NEON::BI__builtin_neon_vcvtm_s32_v: 5028 case NEON::BI__builtin_neon_vcvtmq_s32_v: 5029 case NEON::BI__builtin_neon_vcvtm_u32_v: 5030 case NEON::BI__builtin_neon_vcvtmq_u32_v: 5031 case NEON::BI__builtin_neon_vcvtm_s64_v: 5032 case NEON::BI__builtin_neon_vcvtmq_s64_v: 5033 case NEON::BI__builtin_neon_vcvtm_u64_v: 5034 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 5035 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms; 5036 bool Double = 5037 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 5038 llvm::Type *InTy = 5039 GetNeonType(this, 5040 NeonTypeFlags(Double ? NeonTypeFlags::Float64 5041 : NeonTypeFlags::Float32, false, quad)); 5042 llvm::Type *Tys[2] = { Ty, InTy }; 5043 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); 5044 } 5045 case NEON::BI__builtin_neon_vcvtn_s32_v: 5046 case NEON::BI__builtin_neon_vcvtnq_s32_v: 5047 case NEON::BI__builtin_neon_vcvtn_u32_v: 5048 case NEON::BI__builtin_neon_vcvtnq_u32_v: 5049 case NEON::BI__builtin_neon_vcvtn_s64_v: 5050 case NEON::BI__builtin_neon_vcvtnq_s64_v: 5051 case NEON::BI__builtin_neon_vcvtn_u64_v: 5052 case NEON::BI__builtin_neon_vcvtnq_u64_v: { 5053 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns; 5054 bool Double = 5055 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 5056 llvm::Type *InTy = 5057 GetNeonType(this, 5058 NeonTypeFlags(Double ? NeonTypeFlags::Float64 5059 : NeonTypeFlags::Float32, false, quad)); 5060 llvm::Type *Tys[2] = { Ty, InTy }; 5061 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); 5062 } 5063 case NEON::BI__builtin_neon_vcvtp_s32_v: 5064 case NEON::BI__builtin_neon_vcvtpq_s32_v: 5065 case NEON::BI__builtin_neon_vcvtp_u32_v: 5066 case NEON::BI__builtin_neon_vcvtpq_u32_v: 5067 case NEON::BI__builtin_neon_vcvtp_s64_v: 5068 case NEON::BI__builtin_neon_vcvtpq_s64_v: 5069 case NEON::BI__builtin_neon_vcvtp_u64_v: 5070 case NEON::BI__builtin_neon_vcvtpq_u64_v: { 5071 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps; 5072 bool Double = 5073 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 5074 llvm::Type *InTy = 5075 GetNeonType(this, 5076 NeonTypeFlags(Double ? NeonTypeFlags::Float64 5077 : NeonTypeFlags::Float32, false, quad)); 5078 llvm::Type *Tys[2] = { Ty, InTy }; 5079 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); 5080 } 5081 case NEON::BI__builtin_neon_vmulx_v: 5082 case NEON::BI__builtin_neon_vmulxq_v: { 5083 Int = Intrinsic::aarch64_neon_fmulx; 5084 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 5085 } 5086 case NEON::BI__builtin_neon_vmul_lane_v: 5087 case NEON::BI__builtin_neon_vmul_laneq_v: { 5088 // v1f64 vmul_lane should be mapped to Neon scalar mul lane 5089 bool Quad = false; 5090 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) 5091 Quad = true; 5092 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5093 llvm::Type *VTy = GetNeonType(this, 5094 NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); 5095 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 5096 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 5097 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); 5098 return Builder.CreateBitCast(Result, Ty); 5099 } 5100 case NEON::BI__builtin_neon_vnegd_s64: 5101 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); 5102 case NEON::BI__builtin_neon_vpmaxnm_v: 5103 case NEON::BI__builtin_neon_vpmaxnmq_v: { 5104 Int = Intrinsic::aarch64_neon_fmaxnmp; 5105 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); 5106 } 5107 case NEON::BI__builtin_neon_vpminnm_v: 5108 case NEON::BI__builtin_neon_vpminnmq_v: { 5109 Int = Intrinsic::aarch64_neon_fminnmp; 5110 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); 5111 } 5112 case NEON::BI__builtin_neon_vsqrt_v: 5113 case NEON::BI__builtin_neon_vsqrtq_v: { 5114 Int = Intrinsic::sqrt; 5115 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5116 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); 5117 } 5118 case NEON::BI__builtin_neon_vrbit_v: 5119 case NEON::BI__builtin_neon_vrbitq_v: { 5120 Int = Intrinsic::aarch64_neon_rbit; 5121 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); 5122 } 5123 case NEON::BI__builtin_neon_vaddv_u8: 5124 // FIXME: These are handled by the AArch64 scalar code. 5125 usgn = true; 5126 // FALLTHROUGH 5127 case NEON::BI__builtin_neon_vaddv_s8: { 5128 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 5129 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5130 VTy = 5131 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5132 llvm::Type *Tys[2] = { Ty, VTy }; 5133 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5134 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 5135 return Builder.CreateTrunc(Ops[0], 5136 llvm::IntegerType::get(getLLVMContext(), 8)); 5137 } 5138 case NEON::BI__builtin_neon_vaddv_u16: 5139 usgn = true; 5140 // FALLTHROUGH 5141 case NEON::BI__builtin_neon_vaddv_s16: { 5142 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 5143 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5144 VTy = 5145 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5146 llvm::Type *Tys[2] = { Ty, VTy }; 5147 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5148 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 5149 return Builder.CreateTrunc(Ops[0], 5150 llvm::IntegerType::get(getLLVMContext(), 16)); 5151 } 5152 case NEON::BI__builtin_neon_vaddvq_u8: 5153 usgn = true; 5154 // FALLTHROUGH 5155 case NEON::BI__builtin_neon_vaddvq_s8: { 5156 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 5157 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5158 VTy = 5159 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5160 llvm::Type *Tys[2] = { Ty, VTy }; 5161 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5162 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 5163 return Builder.CreateTrunc(Ops[0], 5164 llvm::IntegerType::get(getLLVMContext(), 8)); 5165 } 5166 case NEON::BI__builtin_neon_vaddvq_u16: 5167 usgn = true; 5168 // FALLTHROUGH 5169 case NEON::BI__builtin_neon_vaddvq_s16: { 5170 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 5171 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5172 VTy = 5173 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5174 llvm::Type *Tys[2] = { Ty, VTy }; 5175 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5176 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 5177 return Builder.CreateTrunc(Ops[0], 5178 llvm::IntegerType::get(getLLVMContext(), 16)); 5179 } 5180 case NEON::BI__builtin_neon_vmaxv_u8: { 5181 Int = Intrinsic::aarch64_neon_umaxv; 5182 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5183 VTy = 5184 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5185 llvm::Type *Tys[2] = { Ty, VTy }; 5186 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5187 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5188 return Builder.CreateTrunc(Ops[0], 5189 llvm::IntegerType::get(getLLVMContext(), 8)); 5190 } 5191 case NEON::BI__builtin_neon_vmaxv_u16: { 5192 Int = Intrinsic::aarch64_neon_umaxv; 5193 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5194 VTy = 5195 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5196 llvm::Type *Tys[2] = { Ty, VTy }; 5197 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5198 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5199 return Builder.CreateTrunc(Ops[0], 5200 llvm::IntegerType::get(getLLVMContext(), 16)); 5201 } 5202 case NEON::BI__builtin_neon_vmaxvq_u8: { 5203 Int = Intrinsic::aarch64_neon_umaxv; 5204 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5205 VTy = 5206 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5207 llvm::Type *Tys[2] = { Ty, VTy }; 5208 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5209 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5210 return Builder.CreateTrunc(Ops[0], 5211 llvm::IntegerType::get(getLLVMContext(), 8)); 5212 } 5213 case NEON::BI__builtin_neon_vmaxvq_u16: { 5214 Int = Intrinsic::aarch64_neon_umaxv; 5215 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5216 VTy = 5217 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5218 llvm::Type *Tys[2] = { Ty, VTy }; 5219 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5220 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5221 return Builder.CreateTrunc(Ops[0], 5222 llvm::IntegerType::get(getLLVMContext(), 16)); 5223 } 5224 case NEON::BI__builtin_neon_vmaxv_s8: { 5225 Int = Intrinsic::aarch64_neon_smaxv; 5226 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5227 VTy = 5228 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5229 llvm::Type *Tys[2] = { Ty, VTy }; 5230 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5231 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5232 return Builder.CreateTrunc(Ops[0], 5233 llvm::IntegerType::get(getLLVMContext(), 8)); 5234 } 5235 case NEON::BI__builtin_neon_vmaxv_s16: { 5236 Int = Intrinsic::aarch64_neon_smaxv; 5237 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5238 VTy = 5239 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5240 llvm::Type *Tys[2] = { Ty, VTy }; 5241 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5242 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5243 return Builder.CreateTrunc(Ops[0], 5244 llvm::IntegerType::get(getLLVMContext(), 16)); 5245 } 5246 case NEON::BI__builtin_neon_vmaxvq_s8: { 5247 Int = Intrinsic::aarch64_neon_smaxv; 5248 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5249 VTy = 5250 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5251 llvm::Type *Tys[2] = { Ty, VTy }; 5252 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5253 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5254 return Builder.CreateTrunc(Ops[0], 5255 llvm::IntegerType::get(getLLVMContext(), 8)); 5256 } 5257 case NEON::BI__builtin_neon_vmaxvq_s16: { 5258 Int = Intrinsic::aarch64_neon_smaxv; 5259 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5260 VTy = 5261 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5262 llvm::Type *Tys[2] = { Ty, VTy }; 5263 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5264 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5265 return Builder.CreateTrunc(Ops[0], 5266 llvm::IntegerType::get(getLLVMContext(), 16)); 5267 } 5268 case NEON::BI__builtin_neon_vminv_u8: { 5269 Int = Intrinsic::aarch64_neon_uminv; 5270 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5271 VTy = 5272 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5273 llvm::Type *Tys[2] = { Ty, VTy }; 5274 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5275 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5276 return Builder.CreateTrunc(Ops[0], 5277 llvm::IntegerType::get(getLLVMContext(), 8)); 5278 } 5279 case NEON::BI__builtin_neon_vminv_u16: { 5280 Int = Intrinsic::aarch64_neon_uminv; 5281 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5282 VTy = 5283 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5284 llvm::Type *Tys[2] = { Ty, VTy }; 5285 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5286 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5287 return Builder.CreateTrunc(Ops[0], 5288 llvm::IntegerType::get(getLLVMContext(), 16)); 5289 } 5290 case NEON::BI__builtin_neon_vminvq_u8: { 5291 Int = Intrinsic::aarch64_neon_uminv; 5292 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5293 VTy = 5294 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5295 llvm::Type *Tys[2] = { Ty, VTy }; 5296 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5297 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5298 return Builder.CreateTrunc(Ops[0], 5299 llvm::IntegerType::get(getLLVMContext(), 8)); 5300 } 5301 case NEON::BI__builtin_neon_vminvq_u16: { 5302 Int = Intrinsic::aarch64_neon_uminv; 5303 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5304 VTy = 5305 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5306 llvm::Type *Tys[2] = { Ty, VTy }; 5307 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5308 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5309 return Builder.CreateTrunc(Ops[0], 5310 llvm::IntegerType::get(getLLVMContext(), 16)); 5311 } 5312 case NEON::BI__builtin_neon_vminv_s8: { 5313 Int = Intrinsic::aarch64_neon_sminv; 5314 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5315 VTy = 5316 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5317 llvm::Type *Tys[2] = { Ty, VTy }; 5318 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5319 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5320 return Builder.CreateTrunc(Ops[0], 5321 llvm::IntegerType::get(getLLVMContext(), 8)); 5322 } 5323 case NEON::BI__builtin_neon_vminv_s16: { 5324 Int = Intrinsic::aarch64_neon_sminv; 5325 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5326 VTy = 5327 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5328 llvm::Type *Tys[2] = { Ty, VTy }; 5329 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5330 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5331 return Builder.CreateTrunc(Ops[0], 5332 llvm::IntegerType::get(getLLVMContext(), 16)); 5333 } 5334 case NEON::BI__builtin_neon_vminvq_s8: { 5335 Int = Intrinsic::aarch64_neon_sminv; 5336 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5337 VTy = 5338 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5339 llvm::Type *Tys[2] = { Ty, VTy }; 5340 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5341 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5342 return Builder.CreateTrunc(Ops[0], 5343 llvm::IntegerType::get(getLLVMContext(), 8)); 5344 } 5345 case NEON::BI__builtin_neon_vminvq_s16: { 5346 Int = Intrinsic::aarch64_neon_sminv; 5347 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5348 VTy = 5349 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5350 llvm::Type *Tys[2] = { Ty, VTy }; 5351 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5352 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5353 return Builder.CreateTrunc(Ops[0], 5354 llvm::IntegerType::get(getLLVMContext(), 16)); 5355 } 5356 case NEON::BI__builtin_neon_vmul_n_f64: { 5357 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5358 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); 5359 return Builder.CreateFMul(Ops[0], RHS); 5360 } 5361 case NEON::BI__builtin_neon_vaddlv_u8: { 5362 Int = Intrinsic::aarch64_neon_uaddlv; 5363 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5364 VTy = 5365 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5366 llvm::Type *Tys[2] = { Ty, VTy }; 5367 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5368 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5369 return Builder.CreateTrunc(Ops[0], 5370 llvm::IntegerType::get(getLLVMContext(), 16)); 5371 } 5372 case NEON::BI__builtin_neon_vaddlv_u16: { 5373 Int = Intrinsic::aarch64_neon_uaddlv; 5374 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5375 VTy = 5376 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5377 llvm::Type *Tys[2] = { Ty, VTy }; 5378 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5379 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5380 } 5381 case NEON::BI__builtin_neon_vaddlvq_u8: { 5382 Int = Intrinsic::aarch64_neon_uaddlv; 5383 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5384 VTy = 5385 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5386 llvm::Type *Tys[2] = { Ty, VTy }; 5387 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5388 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5389 return Builder.CreateTrunc(Ops[0], 5390 llvm::IntegerType::get(getLLVMContext(), 16)); 5391 } 5392 case NEON::BI__builtin_neon_vaddlvq_u16: { 5393 Int = Intrinsic::aarch64_neon_uaddlv; 5394 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5395 VTy = 5396 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5397 llvm::Type *Tys[2] = { Ty, VTy }; 5398 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5399 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5400 } 5401 case NEON::BI__builtin_neon_vaddlv_s8: { 5402 Int = Intrinsic::aarch64_neon_saddlv; 5403 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5404 VTy = 5405 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5406 llvm::Type *Tys[2] = { Ty, VTy }; 5407 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5408 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5409 return Builder.CreateTrunc(Ops[0], 5410 llvm::IntegerType::get(getLLVMContext(), 16)); 5411 } 5412 case NEON::BI__builtin_neon_vaddlv_s16: { 5413 Int = Intrinsic::aarch64_neon_saddlv; 5414 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5415 VTy = 5416 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5417 llvm::Type *Tys[2] = { Ty, VTy }; 5418 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5419 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5420 } 5421 case NEON::BI__builtin_neon_vaddlvq_s8: { 5422 Int = Intrinsic::aarch64_neon_saddlv; 5423 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5424 VTy = 5425 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5426 llvm::Type *Tys[2] = { Ty, VTy }; 5427 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5428 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5429 return Builder.CreateTrunc(Ops[0], 5430 llvm::IntegerType::get(getLLVMContext(), 16)); 5431 } 5432 case NEON::BI__builtin_neon_vaddlvq_s16: { 5433 Int = Intrinsic::aarch64_neon_saddlv; 5434 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5435 VTy = 5436 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5437 llvm::Type *Tys[2] = { Ty, VTy }; 5438 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5439 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5440 } 5441 case NEON::BI__builtin_neon_vsri_n_v: 5442 case NEON::BI__builtin_neon_vsriq_n_v: { 5443 Int = Intrinsic::aarch64_neon_vsri; 5444 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 5445 return EmitNeonCall(Intrin, Ops, "vsri_n"); 5446 } 5447 case NEON::BI__builtin_neon_vsli_n_v: 5448 case NEON::BI__builtin_neon_vsliq_n_v: { 5449 Int = Intrinsic::aarch64_neon_vsli; 5450 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 5451 return EmitNeonCall(Intrin, Ops, "vsli_n"); 5452 } 5453 case NEON::BI__builtin_neon_vsra_n_v: 5454 case NEON::BI__builtin_neon_vsraq_n_v: 5455 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5456 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 5457 return Builder.CreateAdd(Ops[0], Ops[1]); 5458 case NEON::BI__builtin_neon_vrsra_n_v: 5459 case NEON::BI__builtin_neon_vrsraq_n_v: { 5460 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; 5461 SmallVector<llvm::Value*,2> TmpOps; 5462 TmpOps.push_back(Ops[1]); 5463 TmpOps.push_back(Ops[2]); 5464 Function* F = CGM.getIntrinsic(Int, Ty); 5465 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true); 5466 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 5467 return Builder.CreateAdd(Ops[0], tmp); 5468 } 5469 // FIXME: Sharing loads & stores with 32-bit is complicated by the absence 5470 // of an Align parameter here. 5471 case NEON::BI__builtin_neon_vld1_x2_v: 5472 case NEON::BI__builtin_neon_vld1q_x2_v: 5473 case NEON::BI__builtin_neon_vld1_x3_v: 5474 case NEON::BI__builtin_neon_vld1q_x3_v: 5475 case NEON::BI__builtin_neon_vld1_x4_v: 5476 case NEON::BI__builtin_neon_vld1q_x4_v: { 5477 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 5478 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5479 llvm::Type *Tys[2] = { VTy, PTy }; 5480 unsigned Int; 5481 switch (BuiltinID) { 5482 case NEON::BI__builtin_neon_vld1_x2_v: 5483 case NEON::BI__builtin_neon_vld1q_x2_v: 5484 Int = Intrinsic::aarch64_neon_ld1x2; 5485 break; 5486 case NEON::BI__builtin_neon_vld1_x3_v: 5487 case NEON::BI__builtin_neon_vld1q_x3_v: 5488 Int = Intrinsic::aarch64_neon_ld1x3; 5489 break; 5490 case NEON::BI__builtin_neon_vld1_x4_v: 5491 case NEON::BI__builtin_neon_vld1q_x4_v: 5492 Int = Intrinsic::aarch64_neon_ld1x4; 5493 break; 5494 } 5495 Function *F = CGM.getIntrinsic(Int, Tys); 5496 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); 5497 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5498 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5499 return Builder.CreateStore(Ops[1], Ops[0]); 5500 } 5501 case NEON::BI__builtin_neon_vst1_x2_v: 5502 case NEON::BI__builtin_neon_vst1q_x2_v: 5503 case NEON::BI__builtin_neon_vst1_x3_v: 5504 case NEON::BI__builtin_neon_vst1q_x3_v: 5505 case NEON::BI__builtin_neon_vst1_x4_v: 5506 case NEON::BI__builtin_neon_vst1q_x4_v: { 5507 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 5508 llvm::Type *Tys[2] = { VTy, PTy }; 5509 unsigned Int; 5510 switch (BuiltinID) { 5511 case NEON::BI__builtin_neon_vst1_x2_v: 5512 case NEON::BI__builtin_neon_vst1q_x2_v: 5513 Int = Intrinsic::aarch64_neon_st1x2; 5514 break; 5515 case NEON::BI__builtin_neon_vst1_x3_v: 5516 case NEON::BI__builtin_neon_vst1q_x3_v: 5517 Int = Intrinsic::aarch64_neon_st1x3; 5518 break; 5519 case NEON::BI__builtin_neon_vst1_x4_v: 5520 case NEON::BI__builtin_neon_vst1q_x4_v: 5521 Int = Intrinsic::aarch64_neon_st1x4; 5522 break; 5523 } 5524 SmallVector<Value *, 4> IntOps(Ops.begin()+1, Ops.end()); 5525 IntOps.push_back(Ops[0]); 5526 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), IntOps, ""); 5527 } 5528 case NEON::BI__builtin_neon_vld1_v: 5529 case NEON::BI__builtin_neon_vld1q_v: 5530 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 5531 return Builder.CreateLoad(Ops[0]); 5532 case NEON::BI__builtin_neon_vst1_v: 5533 case NEON::BI__builtin_neon_vst1q_v: 5534 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 5535 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 5536 return Builder.CreateStore(Ops[1], Ops[0]); 5537 case NEON::BI__builtin_neon_vld1_lane_v: 5538 case NEON::BI__builtin_neon_vld1q_lane_v: 5539 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5540 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 5541 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5542 Ops[0] = Builder.CreateLoad(Ops[0]); 5543 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 5544 case NEON::BI__builtin_neon_vld1_dup_v: 5545 case NEON::BI__builtin_neon_vld1q_dup_v: { 5546 Value *V = UndefValue::get(Ty); 5547 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 5548 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5549 Ops[0] = Builder.CreateLoad(Ops[0]); 5550 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 5551 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 5552 return EmitNeonSplat(Ops[0], CI); 5553 } 5554 case NEON::BI__builtin_neon_vst1_lane_v: 5555 case NEON::BI__builtin_neon_vst1q_lane_v: 5556 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5557 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 5558 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5559 return Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty)); 5560 case NEON::BI__builtin_neon_vld2_v: 5561 case NEON::BI__builtin_neon_vld2q_v: { 5562 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 5563 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5564 llvm::Type *Tys[2] = { VTy, PTy }; 5565 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); 5566 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 5567 Ops[0] = Builder.CreateBitCast(Ops[0], 5568 llvm::PointerType::getUnqual(Ops[1]->getType())); 5569 return Builder.CreateStore(Ops[1], Ops[0]); 5570 } 5571 case NEON::BI__builtin_neon_vld3_v: 5572 case NEON::BI__builtin_neon_vld3q_v: { 5573 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 5574 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5575 llvm::Type *Tys[2] = { VTy, PTy }; 5576 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); 5577 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 5578 Ops[0] = Builder.CreateBitCast(Ops[0], 5579 llvm::PointerType::getUnqual(Ops[1]->getType())); 5580 return Builder.CreateStore(Ops[1], Ops[0]); 5581 } 5582 case NEON::BI__builtin_neon_vld4_v: 5583 case NEON::BI__builtin_neon_vld4q_v: { 5584 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 5585 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5586 llvm::Type *Tys[2] = { VTy, PTy }; 5587 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); 5588 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 5589 Ops[0] = Builder.CreateBitCast(Ops[0], 5590 llvm::PointerType::getUnqual(Ops[1]->getType())); 5591 return Builder.CreateStore(Ops[1], Ops[0]); 5592 } 5593 case NEON::BI__builtin_neon_vld2_dup_v: 5594 case NEON::BI__builtin_neon_vld2q_dup_v: { 5595 llvm::Type *PTy = 5596 llvm::PointerType::getUnqual(VTy->getElementType()); 5597 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5598 llvm::Type *Tys[2] = { VTy, PTy }; 5599 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); 5600 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 5601 Ops[0] = Builder.CreateBitCast(Ops[0], 5602 llvm::PointerType::getUnqual(Ops[1]->getType())); 5603 return Builder.CreateStore(Ops[1], Ops[0]); 5604 } 5605 case NEON::BI__builtin_neon_vld3_dup_v: 5606 case NEON::BI__builtin_neon_vld3q_dup_v: { 5607 llvm::Type *PTy = 5608 llvm::PointerType::getUnqual(VTy->getElementType()); 5609 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5610 llvm::Type *Tys[2] = { VTy, PTy }; 5611 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); 5612 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 5613 Ops[0] = Builder.CreateBitCast(Ops[0], 5614 llvm::PointerType::getUnqual(Ops[1]->getType())); 5615 return Builder.CreateStore(Ops[1], Ops[0]); 5616 } 5617 case NEON::BI__builtin_neon_vld4_dup_v: 5618 case NEON::BI__builtin_neon_vld4q_dup_v: { 5619 llvm::Type *PTy = 5620 llvm::PointerType::getUnqual(VTy->getElementType()); 5621 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5622 llvm::Type *Tys[2] = { VTy, PTy }; 5623 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); 5624 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 5625 Ops[0] = Builder.CreateBitCast(Ops[0], 5626 llvm::PointerType::getUnqual(Ops[1]->getType())); 5627 return Builder.CreateStore(Ops[1], Ops[0]); 5628 } 5629 case NEON::BI__builtin_neon_vld2_lane_v: 5630 case NEON::BI__builtin_neon_vld2q_lane_v: { 5631 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 5632 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); 5633 Ops.push_back(Ops[1]); 5634 Ops.erase(Ops.begin()+1); 5635 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5636 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5637 Ops[3] = Builder.CreateZExt(Ops[3], 5638 llvm::IntegerType::get(getLLVMContext(), 64)); 5639 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); 5640 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5641 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5642 return Builder.CreateStore(Ops[1], Ops[0]); 5643 } 5644 case NEON::BI__builtin_neon_vld3_lane_v: 5645 case NEON::BI__builtin_neon_vld3q_lane_v: { 5646 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 5647 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); 5648 Ops.push_back(Ops[1]); 5649 Ops.erase(Ops.begin()+1); 5650 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5651 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5652 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 5653 Ops[4] = Builder.CreateZExt(Ops[4], 5654 llvm::IntegerType::get(getLLVMContext(), 64)); 5655 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); 5656 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5657 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5658 return Builder.CreateStore(Ops[1], Ops[0]); 5659 } 5660 case NEON::BI__builtin_neon_vld4_lane_v: 5661 case NEON::BI__builtin_neon_vld4q_lane_v: { 5662 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 5663 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); 5664 Ops.push_back(Ops[1]); 5665 Ops.erase(Ops.begin()+1); 5666 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5667 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5668 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 5669 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 5670 Ops[5] = Builder.CreateZExt(Ops[5], 5671 llvm::IntegerType::get(getLLVMContext(), 64)); 5672 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane"); 5673 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5674 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5675 return Builder.CreateStore(Ops[1], Ops[0]); 5676 } 5677 case NEON::BI__builtin_neon_vst2_v: 5678 case NEON::BI__builtin_neon_vst2q_v: { 5679 Ops.push_back(Ops[0]); 5680 Ops.erase(Ops.begin()); 5681 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; 5682 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), 5683 Ops, ""); 5684 } 5685 case NEON::BI__builtin_neon_vst2_lane_v: 5686 case NEON::BI__builtin_neon_vst2q_lane_v: { 5687 Ops.push_back(Ops[0]); 5688 Ops.erase(Ops.begin()); 5689 Ops[2] = Builder.CreateZExt(Ops[2], 5690 llvm::IntegerType::get(getLLVMContext(), 64)); 5691 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 5692 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), 5693 Ops, ""); 5694 } 5695 case NEON::BI__builtin_neon_vst3_v: 5696 case NEON::BI__builtin_neon_vst3q_v: { 5697 Ops.push_back(Ops[0]); 5698 Ops.erase(Ops.begin()); 5699 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 5700 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), 5701 Ops, ""); 5702 } 5703 case NEON::BI__builtin_neon_vst3_lane_v: 5704 case NEON::BI__builtin_neon_vst3q_lane_v: { 5705 Ops.push_back(Ops[0]); 5706 Ops.erase(Ops.begin()); 5707 Ops[3] = Builder.CreateZExt(Ops[3], 5708 llvm::IntegerType::get(getLLVMContext(), 64)); 5709 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 5710 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), 5711 Ops, ""); 5712 } 5713 case NEON::BI__builtin_neon_vst4_v: 5714 case NEON::BI__builtin_neon_vst4q_v: { 5715 Ops.push_back(Ops[0]); 5716 Ops.erase(Ops.begin()); 5717 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 5718 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), 5719 Ops, ""); 5720 } 5721 case NEON::BI__builtin_neon_vst4_lane_v: 5722 case NEON::BI__builtin_neon_vst4q_lane_v: { 5723 Ops.push_back(Ops[0]); 5724 Ops.erase(Ops.begin()); 5725 Ops[4] = Builder.CreateZExt(Ops[4], 5726 llvm::IntegerType::get(getLLVMContext(), 64)); 5727 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; 5728 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), 5729 Ops, ""); 5730 } 5731 case NEON::BI__builtin_neon_vtrn_v: 5732 case NEON::BI__builtin_neon_vtrnq_v: { 5733 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 5734 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5735 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5736 Value *SV = nullptr; 5737 5738 for (unsigned vi = 0; vi != 2; ++vi) { 5739 SmallVector<Constant*, 16> Indices; 5740 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 5741 Indices.push_back(ConstantInt::get(Int32Ty, i+vi)); 5742 Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi)); 5743 } 5744 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 5745 SV = llvm::ConstantVector::get(Indices); 5746 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); 5747 SV = Builder.CreateStore(SV, Addr); 5748 } 5749 return SV; 5750 } 5751 case NEON::BI__builtin_neon_vuzp_v: 5752 case NEON::BI__builtin_neon_vuzpq_v: { 5753 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 5754 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5755 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5756 Value *SV = nullptr; 5757 5758 for (unsigned vi = 0; vi != 2; ++vi) { 5759 SmallVector<Constant*, 16> Indices; 5760 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 5761 Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); 5762 5763 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 5764 SV = llvm::ConstantVector::get(Indices); 5765 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); 5766 SV = Builder.CreateStore(SV, Addr); 5767 } 5768 return SV; 5769 } 5770 case NEON::BI__builtin_neon_vzip_v: 5771 case NEON::BI__builtin_neon_vzipq_v: { 5772 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 5773 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5774 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5775 Value *SV = nullptr; 5776 5777 for (unsigned vi = 0; vi != 2; ++vi) { 5778 SmallVector<Constant*, 16> Indices; 5779 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 5780 Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); 5781 Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); 5782 } 5783 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 5784 SV = llvm::ConstantVector::get(Indices); 5785 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); 5786 SV = Builder.CreateStore(SV, Addr); 5787 } 5788 return SV; 5789 } 5790 case NEON::BI__builtin_neon_vqtbl1q_v: { 5791 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty), 5792 Ops, "vtbl1"); 5793 } 5794 case NEON::BI__builtin_neon_vqtbl2q_v: { 5795 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty), 5796 Ops, "vtbl2"); 5797 } 5798 case NEON::BI__builtin_neon_vqtbl3q_v: { 5799 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty), 5800 Ops, "vtbl3"); 5801 } 5802 case NEON::BI__builtin_neon_vqtbl4q_v: { 5803 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty), 5804 Ops, "vtbl4"); 5805 } 5806 case NEON::BI__builtin_neon_vqtbx1q_v: { 5807 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty), 5808 Ops, "vtbx1"); 5809 } 5810 case NEON::BI__builtin_neon_vqtbx2q_v: { 5811 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty), 5812 Ops, "vtbx2"); 5813 } 5814 case NEON::BI__builtin_neon_vqtbx3q_v: { 5815 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty), 5816 Ops, "vtbx3"); 5817 } 5818 case NEON::BI__builtin_neon_vqtbx4q_v: { 5819 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty), 5820 Ops, "vtbx4"); 5821 } 5822 case NEON::BI__builtin_neon_vsqadd_v: 5823 case NEON::BI__builtin_neon_vsqaddq_v: { 5824 Int = Intrinsic::aarch64_neon_usqadd; 5825 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); 5826 } 5827 case NEON::BI__builtin_neon_vuqadd_v: 5828 case NEON::BI__builtin_neon_vuqaddq_v: { 5829 Int = Intrinsic::aarch64_neon_suqadd; 5830 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); 5831 } 5832 } 5833 } 5834 5835 llvm::Value *CodeGenFunction:: 5836 BuildVector(ArrayRef<llvm::Value*> Ops) { 5837 assert((Ops.size() & (Ops.size() - 1)) == 0 && 5838 "Not a power-of-two sized vector!"); 5839 bool AllConstants = true; 5840 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 5841 AllConstants &= isa<Constant>(Ops[i]); 5842 5843 // If this is a constant vector, create a ConstantVector. 5844 if (AllConstants) { 5845 SmallVector<llvm::Constant*, 16> CstOps; 5846 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 5847 CstOps.push_back(cast<Constant>(Ops[i])); 5848 return llvm::ConstantVector::get(CstOps); 5849 } 5850 5851 // Otherwise, insertelement the values to build the vector. 5852 Value *Result = 5853 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 5854 5855 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 5856 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); 5857 5858 return Result; 5859 } 5860 5861 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 5862 const CallExpr *E) { 5863 SmallVector<Value*, 4> Ops; 5864 5865 // Find out if any arguments are required to be integer constant expressions. 5866 unsigned ICEArguments = 0; 5867 ASTContext::GetBuiltinTypeError Error; 5868 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 5869 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 5870 5871 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 5872 // If this is a normal argument, just emit it as a scalar. 5873 if ((ICEArguments & (1 << i)) == 0) { 5874 Ops.push_back(EmitScalarExpr(E->getArg(i))); 5875 continue; 5876 } 5877 5878 // If this is required to be a constant, constant fold it so that we know 5879 // that the generated intrinsic gets a ConstantInt. 5880 llvm::APSInt Result; 5881 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 5882 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 5883 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 5884 } 5885 5886 switch (BuiltinID) { 5887 default: return nullptr; 5888 case X86::BI_mm_prefetch: { 5889 Value *Address = EmitScalarExpr(E->getArg(0)); 5890 Value *RW = ConstantInt::get(Int32Ty, 0); 5891 Value *Locality = EmitScalarExpr(E->getArg(1)); 5892 Value *Data = ConstantInt::get(Int32Ty, 1); 5893 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 5894 return Builder.CreateCall4(F, Address, RW, Locality, Data); 5895 } 5896 case X86::BI__builtin_ia32_vec_init_v8qi: 5897 case X86::BI__builtin_ia32_vec_init_v4hi: 5898 case X86::BI__builtin_ia32_vec_init_v2si: 5899 return Builder.CreateBitCast(BuildVector(Ops), 5900 llvm::Type::getX86_MMXTy(getLLVMContext())); 5901 case X86::BI__builtin_ia32_vec_ext_v2si: 5902 return Builder.CreateExtractElement(Ops[0], 5903 llvm::ConstantInt::get(Ops[1]->getType(), 0)); 5904 case X86::BI__builtin_ia32_ldmxcsr: { 5905 Value *Tmp = CreateMemTemp(E->getArg(0)->getType()); 5906 Builder.CreateStore(Ops[0], Tmp); 5907 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 5908 Builder.CreateBitCast(Tmp, Int8PtrTy)); 5909 } 5910 case X86::BI__builtin_ia32_stmxcsr: { 5911 Value *Tmp = CreateMemTemp(E->getType()); 5912 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 5913 Builder.CreateBitCast(Tmp, Int8PtrTy)); 5914 return Builder.CreateLoad(Tmp, "stmxcsr"); 5915 } 5916 case X86::BI__builtin_ia32_storehps: 5917 case X86::BI__builtin_ia32_storelps: { 5918 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 5919 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 5920 5921 // cast val v2i64 5922 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 5923 5924 // extract (0, 1) 5925 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 5926 llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index); 5927 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 5928 5929 // cast pointer to i64 & store 5930 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 5931 return Builder.CreateStore(Ops[1], Ops[0]); 5932 } 5933 case X86::BI__builtin_ia32_palignr128: 5934 case X86::BI__builtin_ia32_palignr256: { 5935 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 5936 5937 unsigned NumElts = 5938 cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); 5939 assert(NumElts % 16 == 0); 5940 unsigned NumLanes = NumElts / 16; 5941 unsigned NumLaneElts = NumElts / NumLanes; 5942 5943 // If palignr is shifting the pair of vectors more than the size of two 5944 // lanes, emit zero. 5945 if (ShiftVal >= (2 * NumLaneElts)) 5946 return llvm::Constant::getNullValue(ConvertType(E->getType())); 5947 5948 // If palignr is shifting the pair of input vectors more than one lane, 5949 // but less than two lanes, convert to shifting in zeroes. 5950 if (ShiftVal > NumLaneElts) { 5951 ShiftVal -= NumLaneElts; 5952 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); 5953 } 5954 5955 SmallVector<llvm::Constant*, 32> Indices; 5956 // 256-bit palignr operates on 128-bit lanes so we need to handle that 5957 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 5958 for (unsigned i = 0; i != NumLaneElts; ++i) { 5959 unsigned Idx = ShiftVal + i; 5960 if (Idx >= NumLaneElts) 5961 Idx += NumElts - NumLaneElts; // End of lane, switch operand. 5962 Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l)); 5963 } 5964 } 5965 5966 Value* SV = llvm::ConstantVector::get(Indices); 5967 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 5968 } 5969 case X86::BI__builtin_ia32_pslldqi256: { 5970 // Shift value is in bits so divide by 8. 5971 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3; 5972 5973 // If pslldq is shifting the vector more than 15 bytes, emit zero. 5974 if (shiftVal >= 16) 5975 return llvm::Constant::getNullValue(ConvertType(E->getType())); 5976 5977 SmallVector<llvm::Constant*, 32> Indices; 5978 // 256-bit pslldq operates on 128-bit lanes so we need to handle that 5979 for (unsigned l = 0; l != 32; l += 16) { 5980 for (unsigned i = 0; i != 16; ++i) { 5981 unsigned Idx = 32 + i - shiftVal; 5982 if (Idx < 32) Idx -= 16; // end of lane, switch operand. 5983 Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l)); 5984 } 5985 } 5986 5987 llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32); 5988 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 5989 Value *Zero = llvm::Constant::getNullValue(VecTy); 5990 5991 Value *SV = llvm::ConstantVector::get(Indices); 5992 SV = Builder.CreateShuffleVector(Zero, Ops[0], SV, "pslldq"); 5993 llvm::Type *ResultType = ConvertType(E->getType()); 5994 return Builder.CreateBitCast(SV, ResultType, "cast"); 5995 } 5996 case X86::BI__builtin_ia32_psrldqi256: { 5997 // Shift value is in bits so divide by 8. 5998 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3; 5999 6000 // If psrldq is shifting the vector more than 15 bytes, emit zero. 6001 if (shiftVal >= 16) 6002 return llvm::Constant::getNullValue(ConvertType(E->getType())); 6003 6004 SmallVector<llvm::Constant*, 32> Indices; 6005 // 256-bit psrldq operates on 128-bit lanes so we need to handle that 6006 for (unsigned l = 0; l != 32; l += 16) { 6007 for (unsigned i = 0; i != 16; ++i) { 6008 unsigned Idx = i + shiftVal; 6009 if (Idx >= 16) Idx += 16; // end of lane, switch operand. 6010 Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l)); 6011 } 6012 } 6013 6014 llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32); 6015 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 6016 Value *Zero = llvm::Constant::getNullValue(VecTy); 6017 6018 Value *SV = llvm::ConstantVector::get(Indices); 6019 SV = Builder.CreateShuffleVector(Ops[0], Zero, SV, "psrldq"); 6020 llvm::Type *ResultType = ConvertType(E->getType()); 6021 return Builder.CreateBitCast(SV, ResultType, "cast"); 6022 } 6023 case X86::BI__builtin_ia32_movntps: 6024 case X86::BI__builtin_ia32_movntps256: 6025 case X86::BI__builtin_ia32_movntpd: 6026 case X86::BI__builtin_ia32_movntpd256: 6027 case X86::BI__builtin_ia32_movntdq: 6028 case X86::BI__builtin_ia32_movntdq256: 6029 case X86::BI__builtin_ia32_movnti: 6030 case X86::BI__builtin_ia32_movnti64: { 6031 llvm::MDNode *Node = llvm::MDNode::get( 6032 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); 6033 6034 // Convert the type of the pointer to a pointer to the stored type. 6035 Value *BC = Builder.CreateBitCast(Ops[0], 6036 llvm::PointerType::getUnqual(Ops[1]->getType()), 6037 "cast"); 6038 StoreInst *SI = Builder.CreateStore(Ops[1], BC); 6039 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 6040 6041 // If the operand is an integer, we can't assume alignment. Otherwise, 6042 // assume natural alignment. 6043 QualType ArgTy = E->getArg(1)->getType(); 6044 unsigned Align; 6045 if (ArgTy->isIntegerType()) 6046 Align = 1; 6047 else 6048 Align = getContext().getTypeSizeInChars(ArgTy).getQuantity(); 6049 SI->setAlignment(Align); 6050 return SI; 6051 } 6052 // 3DNow! 6053 case X86::BI__builtin_ia32_pswapdsf: 6054 case X86::BI__builtin_ia32_pswapdsi: { 6055 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); 6056 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); 6057 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd); 6058 return Builder.CreateCall(F, Ops, "pswapd"); 6059 } 6060 case X86::BI__builtin_ia32_rdrand16_step: 6061 case X86::BI__builtin_ia32_rdrand32_step: 6062 case X86::BI__builtin_ia32_rdrand64_step: 6063 case X86::BI__builtin_ia32_rdseed16_step: 6064 case X86::BI__builtin_ia32_rdseed32_step: 6065 case X86::BI__builtin_ia32_rdseed64_step: { 6066 Intrinsic::ID ID; 6067 switch (BuiltinID) { 6068 default: llvm_unreachable("Unsupported intrinsic!"); 6069 case X86::BI__builtin_ia32_rdrand16_step: 6070 ID = Intrinsic::x86_rdrand_16; 6071 break; 6072 case X86::BI__builtin_ia32_rdrand32_step: 6073 ID = Intrinsic::x86_rdrand_32; 6074 break; 6075 case X86::BI__builtin_ia32_rdrand64_step: 6076 ID = Intrinsic::x86_rdrand_64; 6077 break; 6078 case X86::BI__builtin_ia32_rdseed16_step: 6079 ID = Intrinsic::x86_rdseed_16; 6080 break; 6081 case X86::BI__builtin_ia32_rdseed32_step: 6082 ID = Intrinsic::x86_rdseed_32; 6083 break; 6084 case X86::BI__builtin_ia32_rdseed64_step: 6085 ID = Intrinsic::x86_rdseed_64; 6086 break; 6087 } 6088 6089 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); 6090 Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]); 6091 return Builder.CreateExtractValue(Call, 1); 6092 } 6093 // AVX2 broadcast 6094 case X86::BI__builtin_ia32_vbroadcastsi256: { 6095 Value *VecTmp = CreateMemTemp(E->getArg(0)->getType()); 6096 Builder.CreateStore(Ops[0], VecTmp); 6097 Value *F = CGM.getIntrinsic(Intrinsic::x86_avx2_vbroadcasti128); 6098 return Builder.CreateCall(F, Builder.CreateBitCast(VecTmp, Int8PtrTy)); 6099 } 6100 // SSE comparison intrisics 6101 case X86::BI__builtin_ia32_cmpeqps: 6102 case X86::BI__builtin_ia32_cmpltps: 6103 case X86::BI__builtin_ia32_cmpleps: 6104 case X86::BI__builtin_ia32_cmpunordps: 6105 case X86::BI__builtin_ia32_cmpneqps: 6106 case X86::BI__builtin_ia32_cmpnltps: 6107 case X86::BI__builtin_ia32_cmpnleps: 6108 case X86::BI__builtin_ia32_cmpordps: 6109 case X86::BI__builtin_ia32_cmpeqss: 6110 case X86::BI__builtin_ia32_cmpltss: 6111 case X86::BI__builtin_ia32_cmpless: 6112 case X86::BI__builtin_ia32_cmpunordss: 6113 case X86::BI__builtin_ia32_cmpneqss: 6114 case X86::BI__builtin_ia32_cmpnltss: 6115 case X86::BI__builtin_ia32_cmpnless: 6116 case X86::BI__builtin_ia32_cmpordss: 6117 case X86::BI__builtin_ia32_cmpeqpd: 6118 case X86::BI__builtin_ia32_cmpltpd: 6119 case X86::BI__builtin_ia32_cmplepd: 6120 case X86::BI__builtin_ia32_cmpunordpd: 6121 case X86::BI__builtin_ia32_cmpneqpd: 6122 case X86::BI__builtin_ia32_cmpnltpd: 6123 case X86::BI__builtin_ia32_cmpnlepd: 6124 case X86::BI__builtin_ia32_cmpordpd: 6125 case X86::BI__builtin_ia32_cmpeqsd: 6126 case X86::BI__builtin_ia32_cmpltsd: 6127 case X86::BI__builtin_ia32_cmplesd: 6128 case X86::BI__builtin_ia32_cmpunordsd: 6129 case X86::BI__builtin_ia32_cmpneqsd: 6130 case X86::BI__builtin_ia32_cmpnltsd: 6131 case X86::BI__builtin_ia32_cmpnlesd: 6132 case X86::BI__builtin_ia32_cmpordsd: 6133 // These exist so that the builtin that takes an immediate can be bounds 6134 // checked by clang to avoid passing bad immediates to the backend. Since 6135 // AVX has a larger immediate than SSE we would need separate builtins to 6136 // do the different bounds checking. Rather than create a clang specific 6137 // SSE only builtin, this implements eight separate builtins to match gcc 6138 // implementation. 6139 6140 // Choose the immediate. 6141 unsigned Imm; 6142 switch (BuiltinID) { 6143 default: llvm_unreachable("Unsupported intrinsic!"); 6144 case X86::BI__builtin_ia32_cmpeqps: 6145 case X86::BI__builtin_ia32_cmpeqss: 6146 case X86::BI__builtin_ia32_cmpeqpd: 6147 case X86::BI__builtin_ia32_cmpeqsd: 6148 Imm = 0; 6149 break; 6150 case X86::BI__builtin_ia32_cmpltps: 6151 case X86::BI__builtin_ia32_cmpltss: 6152 case X86::BI__builtin_ia32_cmpltpd: 6153 case X86::BI__builtin_ia32_cmpltsd: 6154 Imm = 1; 6155 break; 6156 case X86::BI__builtin_ia32_cmpleps: 6157 case X86::BI__builtin_ia32_cmpless: 6158 case X86::BI__builtin_ia32_cmplepd: 6159 case X86::BI__builtin_ia32_cmplesd: 6160 Imm = 2; 6161 break; 6162 case X86::BI__builtin_ia32_cmpunordps: 6163 case X86::BI__builtin_ia32_cmpunordss: 6164 case X86::BI__builtin_ia32_cmpunordpd: 6165 case X86::BI__builtin_ia32_cmpunordsd: 6166 Imm = 3; 6167 break; 6168 case X86::BI__builtin_ia32_cmpneqps: 6169 case X86::BI__builtin_ia32_cmpneqss: 6170 case X86::BI__builtin_ia32_cmpneqpd: 6171 case X86::BI__builtin_ia32_cmpneqsd: 6172 Imm = 4; 6173 break; 6174 case X86::BI__builtin_ia32_cmpnltps: 6175 case X86::BI__builtin_ia32_cmpnltss: 6176 case X86::BI__builtin_ia32_cmpnltpd: 6177 case X86::BI__builtin_ia32_cmpnltsd: 6178 Imm = 5; 6179 break; 6180 case X86::BI__builtin_ia32_cmpnleps: 6181 case X86::BI__builtin_ia32_cmpnless: 6182 case X86::BI__builtin_ia32_cmpnlepd: 6183 case X86::BI__builtin_ia32_cmpnlesd: 6184 Imm = 6; 6185 break; 6186 case X86::BI__builtin_ia32_cmpordps: 6187 case X86::BI__builtin_ia32_cmpordss: 6188 case X86::BI__builtin_ia32_cmpordpd: 6189 case X86::BI__builtin_ia32_cmpordsd: 6190 Imm = 7; 6191 break; 6192 } 6193 6194 // Choose the intrinsic ID. 6195 const char *name; 6196 Intrinsic::ID ID; 6197 switch (BuiltinID) { 6198 default: llvm_unreachable("Unsupported intrinsic!"); 6199 case X86::BI__builtin_ia32_cmpeqps: 6200 case X86::BI__builtin_ia32_cmpltps: 6201 case X86::BI__builtin_ia32_cmpleps: 6202 case X86::BI__builtin_ia32_cmpunordps: 6203 case X86::BI__builtin_ia32_cmpneqps: 6204 case X86::BI__builtin_ia32_cmpnltps: 6205 case X86::BI__builtin_ia32_cmpnleps: 6206 case X86::BI__builtin_ia32_cmpordps: 6207 name = "cmpps"; 6208 ID = Intrinsic::x86_sse_cmp_ps; 6209 break; 6210 case X86::BI__builtin_ia32_cmpeqss: 6211 case X86::BI__builtin_ia32_cmpltss: 6212 case X86::BI__builtin_ia32_cmpless: 6213 case X86::BI__builtin_ia32_cmpunordss: 6214 case X86::BI__builtin_ia32_cmpneqss: 6215 case X86::BI__builtin_ia32_cmpnltss: 6216 case X86::BI__builtin_ia32_cmpnless: 6217 case X86::BI__builtin_ia32_cmpordss: 6218 name = "cmpss"; 6219 ID = Intrinsic::x86_sse_cmp_ss; 6220 break; 6221 case X86::BI__builtin_ia32_cmpeqpd: 6222 case X86::BI__builtin_ia32_cmpltpd: 6223 case X86::BI__builtin_ia32_cmplepd: 6224 case X86::BI__builtin_ia32_cmpunordpd: 6225 case X86::BI__builtin_ia32_cmpneqpd: 6226 case X86::BI__builtin_ia32_cmpnltpd: 6227 case X86::BI__builtin_ia32_cmpnlepd: 6228 case X86::BI__builtin_ia32_cmpordpd: 6229 name = "cmppd"; 6230 ID = Intrinsic::x86_sse2_cmp_pd; 6231 break; 6232 case X86::BI__builtin_ia32_cmpeqsd: 6233 case X86::BI__builtin_ia32_cmpltsd: 6234 case X86::BI__builtin_ia32_cmplesd: 6235 case X86::BI__builtin_ia32_cmpunordsd: 6236 case X86::BI__builtin_ia32_cmpneqsd: 6237 case X86::BI__builtin_ia32_cmpnltsd: 6238 case X86::BI__builtin_ia32_cmpnlesd: 6239 case X86::BI__builtin_ia32_cmpordsd: 6240 name = "cmpsd"; 6241 ID = Intrinsic::x86_sse2_cmp_sd; 6242 break; 6243 } 6244 6245 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); 6246 llvm::Function *F = CGM.getIntrinsic(ID); 6247 return Builder.CreateCall(F, Ops, name); 6248 } 6249 } 6250 6251 6252 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 6253 const CallExpr *E) { 6254 SmallVector<Value*, 4> Ops; 6255 6256 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 6257 Ops.push_back(EmitScalarExpr(E->getArg(i))); 6258 6259 Intrinsic::ID ID = Intrinsic::not_intrinsic; 6260 6261 switch (BuiltinID) { 6262 default: return nullptr; 6263 6264 // vec_ld, vec_lvsl, vec_lvsr 6265 case PPC::BI__builtin_altivec_lvx: 6266 case PPC::BI__builtin_altivec_lvxl: 6267 case PPC::BI__builtin_altivec_lvebx: 6268 case PPC::BI__builtin_altivec_lvehx: 6269 case PPC::BI__builtin_altivec_lvewx: 6270 case PPC::BI__builtin_altivec_lvsl: 6271 case PPC::BI__builtin_altivec_lvsr: 6272 case PPC::BI__builtin_vsx_lxvd2x: 6273 case PPC::BI__builtin_vsx_lxvw4x: 6274 { 6275 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 6276 6277 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); 6278 Ops.pop_back(); 6279 6280 switch (BuiltinID) { 6281 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); 6282 case PPC::BI__builtin_altivec_lvx: 6283 ID = Intrinsic::ppc_altivec_lvx; 6284 break; 6285 case PPC::BI__builtin_altivec_lvxl: 6286 ID = Intrinsic::ppc_altivec_lvxl; 6287 break; 6288 case PPC::BI__builtin_altivec_lvebx: 6289 ID = Intrinsic::ppc_altivec_lvebx; 6290 break; 6291 case PPC::BI__builtin_altivec_lvehx: 6292 ID = Intrinsic::ppc_altivec_lvehx; 6293 break; 6294 case PPC::BI__builtin_altivec_lvewx: 6295 ID = Intrinsic::ppc_altivec_lvewx; 6296 break; 6297 case PPC::BI__builtin_altivec_lvsl: 6298 ID = Intrinsic::ppc_altivec_lvsl; 6299 break; 6300 case PPC::BI__builtin_altivec_lvsr: 6301 ID = Intrinsic::ppc_altivec_lvsr; 6302 break; 6303 case PPC::BI__builtin_vsx_lxvd2x: 6304 ID = Intrinsic::ppc_vsx_lxvd2x; 6305 break; 6306 case PPC::BI__builtin_vsx_lxvw4x: 6307 ID = Intrinsic::ppc_vsx_lxvw4x; 6308 break; 6309 } 6310 llvm::Function *F = CGM.getIntrinsic(ID); 6311 return Builder.CreateCall(F, Ops, ""); 6312 } 6313 6314 // vec_st 6315 case PPC::BI__builtin_altivec_stvx: 6316 case PPC::BI__builtin_altivec_stvxl: 6317 case PPC::BI__builtin_altivec_stvebx: 6318 case PPC::BI__builtin_altivec_stvehx: 6319 case PPC::BI__builtin_altivec_stvewx: 6320 case PPC::BI__builtin_vsx_stxvd2x: 6321 case PPC::BI__builtin_vsx_stxvw4x: 6322 { 6323 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 6324 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); 6325 Ops.pop_back(); 6326 6327 switch (BuiltinID) { 6328 default: llvm_unreachable("Unsupported st intrinsic!"); 6329 case PPC::BI__builtin_altivec_stvx: 6330 ID = Intrinsic::ppc_altivec_stvx; 6331 break; 6332 case PPC::BI__builtin_altivec_stvxl: 6333 ID = Intrinsic::ppc_altivec_stvxl; 6334 break; 6335 case PPC::BI__builtin_altivec_stvebx: 6336 ID = Intrinsic::ppc_altivec_stvebx; 6337 break; 6338 case PPC::BI__builtin_altivec_stvehx: 6339 ID = Intrinsic::ppc_altivec_stvehx; 6340 break; 6341 case PPC::BI__builtin_altivec_stvewx: 6342 ID = Intrinsic::ppc_altivec_stvewx; 6343 break; 6344 case PPC::BI__builtin_vsx_stxvd2x: 6345 ID = Intrinsic::ppc_vsx_stxvd2x; 6346 break; 6347 case PPC::BI__builtin_vsx_stxvw4x: 6348 ID = Intrinsic::ppc_vsx_stxvw4x; 6349 break; 6350 } 6351 llvm::Function *F = CGM.getIntrinsic(ID); 6352 return Builder.CreateCall(F, Ops, ""); 6353 } 6354 } 6355 } 6356 6357 // Emit an intrinsic that has 1 float or double. 6358 static Value *emitUnaryFPBuiltin(CodeGenFunction &CGF, 6359 const CallExpr *E, 6360 unsigned IntrinsicID) { 6361 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 6362 6363 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 6364 return CGF.Builder.CreateCall(F, Src0); 6365 } 6366 6367 // Emit an intrinsic that has 3 float or double operands. 6368 static Value *emitTernaryFPBuiltin(CodeGenFunction &CGF, 6369 const CallExpr *E, 6370 unsigned IntrinsicID) { 6371 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 6372 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 6373 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); 6374 6375 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 6376 return CGF.Builder.CreateCall3(F, Src0, Src1, Src2); 6377 } 6378 6379 // Emit an intrinsic that has 1 float or double operand, and 1 integer. 6380 static Value *emitFPIntBuiltin(CodeGenFunction &CGF, 6381 const CallExpr *E, 6382 unsigned IntrinsicID) { 6383 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 6384 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 6385 6386 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 6387 return CGF.Builder.CreateCall2(F, Src0, Src1); 6388 } 6389 6390 Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID, 6391 const CallExpr *E) { 6392 switch (BuiltinID) { 6393 case R600::BI__builtin_amdgpu_div_scale: 6394 case R600::BI__builtin_amdgpu_div_scalef: { 6395 // Translate from the intrinsics's struct return to the builtin's out 6396 // argument. 6397 6398 std::pair<llvm::Value *, unsigned> FlagOutPtr 6399 = EmitPointerWithAlignment(E->getArg(3)); 6400 6401 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 6402 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 6403 llvm::Value *Z = EmitScalarExpr(E->getArg(2)); 6404 6405 llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale, 6406 X->getType()); 6407 6408 llvm::Value *Tmp = Builder.CreateCall3(Callee, X, Y, Z); 6409 6410 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); 6411 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); 6412 6413 llvm::Type *RealFlagType 6414 = FlagOutPtr.first->getType()->getPointerElementType(); 6415 6416 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); 6417 llvm::StoreInst *FlagStore = Builder.CreateStore(FlagExt, FlagOutPtr.first); 6418 FlagStore->setAlignment(FlagOutPtr.second); 6419 return Result; 6420 } 6421 case R600::BI__builtin_amdgpu_div_fmas: 6422 case R600::BI__builtin_amdgpu_div_fmasf: { 6423 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 6424 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 6425 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 6426 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); 6427 6428 llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas, 6429 Src0->getType()); 6430 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); 6431 return Builder.CreateCall4(F, Src0, Src1, Src2, Src3ToBool); 6432 } 6433 case R600::BI__builtin_amdgpu_div_fixup: 6434 case R600::BI__builtin_amdgpu_div_fixupf: 6435 return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup); 6436 case R600::BI__builtin_amdgpu_trig_preop: 6437 case R600::BI__builtin_amdgpu_trig_preopf: 6438 return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop); 6439 case R600::BI__builtin_amdgpu_rcp: 6440 case R600::BI__builtin_amdgpu_rcpf: 6441 return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp); 6442 case R600::BI__builtin_amdgpu_rsq: 6443 case R600::BI__builtin_amdgpu_rsqf: 6444 return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq); 6445 case R600::BI__builtin_amdgpu_rsq_clamped: 6446 case R600::BI__builtin_amdgpu_rsq_clampedf: 6447 return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped); 6448 case R600::BI__builtin_amdgpu_ldexp: 6449 case R600::BI__builtin_amdgpu_ldexpf: 6450 return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp); 6451 case R600::BI__builtin_amdgpu_class: 6452 case R600::BI__builtin_amdgpu_classf: 6453 return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_class); 6454 default: 6455 return nullptr; 6456 } 6457 } 6458