1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit Builtin calls as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeGenFunction.h" 15 #include "CGObjCRuntime.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/ASTContext.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/Basic/TargetBuiltins.h" 21 #include "clang/Basic/TargetInfo.h" 22 #include "clang/CodeGen/CGFunctionInfo.h" 23 #include "llvm/IR/DataLayout.h" 24 #include "llvm/IR/Intrinsics.h" 25 26 using namespace clang; 27 using namespace CodeGen; 28 using namespace llvm; 29 30 /// getBuiltinLibFunction - Given a builtin id for a function like 31 /// "__builtin_fabsf", return a Function* for "fabsf". 32 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, 33 unsigned BuiltinID) { 34 assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); 35 36 // Get the name, skip over the __builtin_ prefix (if necessary). 37 StringRef Name; 38 GlobalDecl D(FD); 39 40 // If the builtin has been declared explicitly with an assembler label, 41 // use the mangled name. This differs from the plain label on platforms 42 // that prefix labels. 43 if (FD->hasAttr<AsmLabelAttr>()) 44 Name = getMangledName(D); 45 else 46 Name = Context.BuiltinInfo.GetName(BuiltinID) + 10; 47 48 llvm::FunctionType *Ty = 49 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); 50 51 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); 52 } 53 54 /// Emit the conversions required to turn the given value into an 55 /// integer of the given size. 56 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 57 QualType T, llvm::IntegerType *IntType) { 58 V = CGF.EmitToMemory(V, T); 59 60 if (V->getType()->isPointerTy()) 61 return CGF.Builder.CreatePtrToInt(V, IntType); 62 63 assert(V->getType() == IntType); 64 return V; 65 } 66 67 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 68 QualType T, llvm::Type *ResultType) { 69 V = CGF.EmitFromMemory(V, T); 70 71 if (ResultType->isPointerTy()) 72 return CGF.Builder.CreateIntToPtr(V, ResultType); 73 74 assert(V->getType() == ResultType); 75 return V; 76 } 77 78 /// Utility to insert an atomic instruction based on Instrinsic::ID 79 /// and the expression node. 80 static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 81 llvm::AtomicRMWInst::BinOp Kind, 82 const CallExpr *E) { 83 QualType T = E->getType(); 84 assert(E->getArg(0)->getType()->isPointerType()); 85 assert(CGF.getContext().hasSameUnqualifiedType(T, 86 E->getArg(0)->getType()->getPointeeType())); 87 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 88 89 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 90 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 91 92 llvm::IntegerType *IntType = 93 llvm::IntegerType::get(CGF.getLLVMContext(), 94 CGF.getContext().getTypeSize(T)); 95 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 96 97 llvm::Value *Args[2]; 98 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 99 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 100 llvm::Type *ValueType = Args[1]->getType(); 101 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 102 103 llvm::Value *Result = 104 CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1], 105 llvm::SequentiallyConsistent); 106 Result = EmitFromInt(CGF, Result, T, ValueType); 107 return RValue::get(Result); 108 } 109 110 /// Utility to insert an atomic instruction based Instrinsic::ID and 111 /// the expression node, where the return value is the result of the 112 /// operation. 113 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 114 llvm::AtomicRMWInst::BinOp Kind, 115 const CallExpr *E, 116 Instruction::BinaryOps Op, 117 bool Invert = false) { 118 QualType T = E->getType(); 119 assert(E->getArg(0)->getType()->isPointerType()); 120 assert(CGF.getContext().hasSameUnqualifiedType(T, 121 E->getArg(0)->getType()->getPointeeType())); 122 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 123 124 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 125 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 126 127 llvm::IntegerType *IntType = 128 llvm::IntegerType::get(CGF.getLLVMContext(), 129 CGF.getContext().getTypeSize(T)); 130 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 131 132 llvm::Value *Args[2]; 133 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 134 llvm::Type *ValueType = Args[1]->getType(); 135 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 136 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 137 138 llvm::Value *Result = 139 CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1], 140 llvm::SequentiallyConsistent); 141 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 142 if (Invert) 143 Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, 144 llvm::ConstantInt::get(IntType, -1)); 145 Result = EmitFromInt(CGF, Result, T, ValueType); 146 return RValue::get(Result); 147 } 148 149 /// EmitFAbs - Emit a call to @llvm.fabs(). 150 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) { 151 Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); 152 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); 153 Call->setDoesNotAccessMemory(); 154 return Call; 155 } 156 157 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn, 158 const CallExpr *E, llvm::Value *calleeValue) { 159 return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E, 160 ReturnValueSlot(), Fn); 161 } 162 163 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* 164 /// depending on IntrinsicID. 165 /// 166 /// \arg CGF The current codegen function. 167 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. 168 /// \arg X The first argument to the llvm.*.with.overflow.*. 169 /// \arg Y The second argument to the llvm.*.with.overflow.*. 170 /// \arg Carry The carry returned by the llvm.*.with.overflow.*. 171 /// \returns The result (i.e. sum/product) returned by the intrinsic. 172 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, 173 const llvm::Intrinsic::ID IntrinsicID, 174 llvm::Value *X, llvm::Value *Y, 175 llvm::Value *&Carry) { 176 // Make sure we have integers of the same width. 177 assert(X->getType() == Y->getType() && 178 "Arguments must be the same type. (Did you forget to make sure both " 179 "arguments have the same integer width?)"); 180 181 llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); 182 llvm::Value *Tmp = CGF.Builder.CreateCall2(Callee, X, Y); 183 Carry = CGF.Builder.CreateExtractValue(Tmp, 1); 184 return CGF.Builder.CreateExtractValue(Tmp, 0); 185 } 186 187 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 188 unsigned BuiltinID, const CallExpr *E) { 189 // See if we can constant fold this builtin. If so, don't emit it at all. 190 Expr::EvalResult Result; 191 if (E->EvaluateAsRValue(Result, CGM.getContext()) && 192 !Result.hasSideEffects()) { 193 if (Result.Val.isInt()) 194 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 195 Result.Val.getInt())); 196 if (Result.Val.isFloat()) 197 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 198 Result.Val.getFloat())); 199 } 200 201 switch (BuiltinID) { 202 default: break; // Handle intrinsics and libm functions below. 203 case Builtin::BI__builtin___CFStringMakeConstantString: 204 case Builtin::BI__builtin___NSStringMakeConstantString: 205 return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr)); 206 case Builtin::BI__builtin_stdarg_start: 207 case Builtin::BI__builtin_va_start: 208 case Builtin::BI__va_start: 209 case Builtin::BI__builtin_va_end: { 210 Value *ArgValue = (BuiltinID == Builtin::BI__va_start) 211 ? EmitScalarExpr(E->getArg(0)) 212 : EmitVAListRef(E->getArg(0)); 213 llvm::Type *DestType = Int8PtrTy; 214 if (ArgValue->getType() != DestType) 215 ArgValue = Builder.CreateBitCast(ArgValue, DestType, 216 ArgValue->getName().data()); 217 218 Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ? 219 Intrinsic::vaend : Intrinsic::vastart; 220 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue)); 221 } 222 case Builtin::BI__builtin_va_copy: { 223 Value *DstPtr = EmitVAListRef(E->getArg(0)); 224 Value *SrcPtr = EmitVAListRef(E->getArg(1)); 225 226 llvm::Type *Type = Int8PtrTy; 227 228 DstPtr = Builder.CreateBitCast(DstPtr, Type); 229 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 230 return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy), 231 DstPtr, SrcPtr)); 232 } 233 case Builtin::BI__builtin_abs: 234 case Builtin::BI__builtin_labs: 235 case Builtin::BI__builtin_llabs: { 236 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 237 238 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 239 Value *CmpResult = 240 Builder.CreateICmpSGE(ArgValue, 241 llvm::Constant::getNullValue(ArgValue->getType()), 242 "abscond"); 243 Value *Result = 244 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 245 246 return RValue::get(Result); 247 } 248 case Builtin::BI__builtin_fmod: 249 case Builtin::BI__builtin_fmodf: 250 case Builtin::BI__builtin_fmodl: { 251 Value *Arg1 = EmitScalarExpr(E->getArg(0)); 252 Value *Arg2 = EmitScalarExpr(E->getArg(1)); 253 Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod"); 254 return RValue::get(Result); 255 } 256 257 case Builtin::BI__builtin_conj: 258 case Builtin::BI__builtin_conjf: 259 case Builtin::BI__builtin_conjl: { 260 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 261 Value *Real = ComplexVal.first; 262 Value *Imag = ComplexVal.second; 263 Value *Zero = 264 Imag->getType()->isFPOrFPVectorTy() 265 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) 266 : llvm::Constant::getNullValue(Imag->getType()); 267 268 Imag = Builder.CreateFSub(Zero, Imag, "sub"); 269 return RValue::getComplex(std::make_pair(Real, Imag)); 270 } 271 case Builtin::BI__builtin_creal: 272 case Builtin::BI__builtin_crealf: 273 case Builtin::BI__builtin_creall: 274 case Builtin::BIcreal: 275 case Builtin::BIcrealf: 276 case Builtin::BIcreall: { 277 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 278 return RValue::get(ComplexVal.first); 279 } 280 281 case Builtin::BI__builtin_cimag: 282 case Builtin::BI__builtin_cimagf: 283 case Builtin::BI__builtin_cimagl: 284 case Builtin::BIcimag: 285 case Builtin::BIcimagf: 286 case Builtin::BIcimagl: { 287 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 288 return RValue::get(ComplexVal.second); 289 } 290 291 case Builtin::BI__builtin_ctzs: 292 case Builtin::BI__builtin_ctz: 293 case Builtin::BI__builtin_ctzl: 294 case Builtin::BI__builtin_ctzll: { 295 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 296 297 llvm::Type *ArgType = ArgValue->getType(); 298 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 299 300 llvm::Type *ResultType = ConvertType(E->getType()); 301 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 302 Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef); 303 if (Result->getType() != ResultType) 304 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 305 "cast"); 306 return RValue::get(Result); 307 } 308 case Builtin::BI__builtin_clzs: 309 case Builtin::BI__builtin_clz: 310 case Builtin::BI__builtin_clzl: 311 case Builtin::BI__builtin_clzll: { 312 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 313 314 llvm::Type *ArgType = ArgValue->getType(); 315 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 316 317 llvm::Type *ResultType = ConvertType(E->getType()); 318 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 319 Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef); 320 if (Result->getType() != ResultType) 321 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 322 "cast"); 323 return RValue::get(Result); 324 } 325 case Builtin::BI__builtin_ffs: 326 case Builtin::BI__builtin_ffsl: 327 case Builtin::BI__builtin_ffsll: { 328 // ffs(x) -> x ? cttz(x) + 1 : 0 329 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 330 331 llvm::Type *ArgType = ArgValue->getType(); 332 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 333 334 llvm::Type *ResultType = ConvertType(E->getType()); 335 Value *Tmp = Builder.CreateAdd(Builder.CreateCall2(F, ArgValue, 336 Builder.getTrue()), 337 llvm::ConstantInt::get(ArgType, 1)); 338 Value *Zero = llvm::Constant::getNullValue(ArgType); 339 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 340 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 341 if (Result->getType() != ResultType) 342 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 343 "cast"); 344 return RValue::get(Result); 345 } 346 case Builtin::BI__builtin_parity: 347 case Builtin::BI__builtin_parityl: 348 case Builtin::BI__builtin_parityll: { 349 // parity(x) -> ctpop(x) & 1 350 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 351 352 llvm::Type *ArgType = ArgValue->getType(); 353 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 354 355 llvm::Type *ResultType = ConvertType(E->getType()); 356 Value *Tmp = Builder.CreateCall(F, ArgValue); 357 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 358 if (Result->getType() != ResultType) 359 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 360 "cast"); 361 return RValue::get(Result); 362 } 363 case Builtin::BI__builtin_popcount: 364 case Builtin::BI__builtin_popcountl: 365 case Builtin::BI__builtin_popcountll: { 366 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 367 368 llvm::Type *ArgType = ArgValue->getType(); 369 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 370 371 llvm::Type *ResultType = ConvertType(E->getType()); 372 Value *Result = Builder.CreateCall(F, ArgValue); 373 if (Result->getType() != ResultType) 374 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 375 "cast"); 376 return RValue::get(Result); 377 } 378 case Builtin::BI__builtin_expect: { 379 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 380 llvm::Type *ArgType = ArgValue->getType(); 381 382 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); 383 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 384 385 Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue, 386 "expval"); 387 return RValue::get(Result); 388 } 389 case Builtin::BI__builtin_assume_aligned: { 390 Value *PtrValue = EmitScalarExpr(E->getArg(0)); 391 Value *OffsetValue = 392 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; 393 394 Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); 395 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); 396 unsigned Alignment = (unsigned) AlignmentCI->getZExtValue(); 397 398 EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue); 399 return RValue::get(PtrValue); 400 } 401 case Builtin::BI__assume: 402 case Builtin::BI__builtin_assume: { 403 if (E->getArg(0)->HasSideEffects(getContext())) 404 return RValue::get(nullptr); 405 406 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 407 Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume); 408 return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); 409 } 410 case Builtin::BI__builtin_bswap16: 411 case Builtin::BI__builtin_bswap32: 412 case Builtin::BI__builtin_bswap64: { 413 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 414 llvm::Type *ArgType = ArgValue->getType(); 415 Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType); 416 return RValue::get(Builder.CreateCall(F, ArgValue)); 417 } 418 case Builtin::BI__builtin_object_size: { 419 // We rely on constant folding to deal with expressions with side effects. 420 assert(!E->getArg(0)->HasSideEffects(getContext()) && 421 "should have been constant folded"); 422 423 // We pass this builtin onto the optimizer so that it can 424 // figure out the object size in more complex cases. 425 llvm::Type *ResType = ConvertType(E->getType()); 426 427 // LLVM only supports 0 and 2, make sure that we pass along that 428 // as a boolean. 429 Value *Ty = EmitScalarExpr(E->getArg(1)); 430 ConstantInt *CI = dyn_cast<ConstantInt>(Ty); 431 assert(CI); 432 uint64_t val = CI->getZExtValue(); 433 CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1); 434 // FIXME: Get right address space. 435 llvm::Type *Tys[] = { ResType, Builder.getInt8PtrTy(0) }; 436 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys); 437 return RValue::get(Builder.CreateCall2(F, EmitScalarExpr(E->getArg(0)),CI)); 438 } 439 case Builtin::BI__builtin_prefetch: { 440 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 441 // FIXME: Technically these constants should of type 'int', yes? 442 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 443 llvm::ConstantInt::get(Int32Ty, 0); 444 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 445 llvm::ConstantInt::get(Int32Ty, 3); 446 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 447 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 448 return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data)); 449 } 450 case Builtin::BI__builtin_readcyclecounter: { 451 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); 452 return RValue::get(Builder.CreateCall(F)); 453 } 454 case Builtin::BI__builtin___clear_cache: { 455 Value *Begin = EmitScalarExpr(E->getArg(0)); 456 Value *End = EmitScalarExpr(E->getArg(1)); 457 Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); 458 return RValue::get(Builder.CreateCall2(F, Begin, End)); 459 } 460 case Builtin::BI__builtin_trap: { 461 Value *F = CGM.getIntrinsic(Intrinsic::trap); 462 return RValue::get(Builder.CreateCall(F)); 463 } 464 case Builtin::BI__debugbreak: { 465 Value *F = CGM.getIntrinsic(Intrinsic::debugtrap); 466 return RValue::get(Builder.CreateCall(F)); 467 } 468 case Builtin::BI__builtin_unreachable: { 469 if (SanOpts->Unreachable) { 470 SanitizerScope SanScope(this); 471 EmitCheck(Builder.getFalse(), "builtin_unreachable", 472 EmitCheckSourceLocation(E->getExprLoc()), 473 None, CRK_Unrecoverable); 474 } else 475 Builder.CreateUnreachable(); 476 477 // We do need to preserve an insertion point. 478 EmitBlock(createBasicBlock("unreachable.cont")); 479 480 return RValue::get(nullptr); 481 } 482 483 case Builtin::BI__builtin_powi: 484 case Builtin::BI__builtin_powif: 485 case Builtin::BI__builtin_powil: { 486 Value *Base = EmitScalarExpr(E->getArg(0)); 487 Value *Exponent = EmitScalarExpr(E->getArg(1)); 488 llvm::Type *ArgType = Base->getType(); 489 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); 490 return RValue::get(Builder.CreateCall2(F, Base, Exponent)); 491 } 492 493 case Builtin::BI__builtin_isgreater: 494 case Builtin::BI__builtin_isgreaterequal: 495 case Builtin::BI__builtin_isless: 496 case Builtin::BI__builtin_islessequal: 497 case Builtin::BI__builtin_islessgreater: 498 case Builtin::BI__builtin_isunordered: { 499 // Ordered comparisons: we know the arguments to these are matching scalar 500 // floating point values. 501 Value *LHS = EmitScalarExpr(E->getArg(0)); 502 Value *RHS = EmitScalarExpr(E->getArg(1)); 503 504 switch (BuiltinID) { 505 default: llvm_unreachable("Unknown ordered comparison"); 506 case Builtin::BI__builtin_isgreater: 507 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 508 break; 509 case Builtin::BI__builtin_isgreaterequal: 510 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 511 break; 512 case Builtin::BI__builtin_isless: 513 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 514 break; 515 case Builtin::BI__builtin_islessequal: 516 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 517 break; 518 case Builtin::BI__builtin_islessgreater: 519 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 520 break; 521 case Builtin::BI__builtin_isunordered: 522 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 523 break; 524 } 525 // ZExt bool to int type. 526 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); 527 } 528 case Builtin::BI__builtin_isnan: { 529 Value *V = EmitScalarExpr(E->getArg(0)); 530 V = Builder.CreateFCmpUNO(V, V, "cmp"); 531 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 532 } 533 534 case Builtin::BI__builtin_isinf: { 535 // isinf(x) --> fabs(x) == infinity 536 Value *V = EmitScalarExpr(E->getArg(0)); 537 V = EmitFAbs(*this, V, E->getArg(0)->getType()); 538 539 V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf"); 540 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 541 } 542 543 // TODO: BI__builtin_isinf_sign 544 // isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0 545 546 case Builtin::BI__builtin_isnormal: { 547 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 548 Value *V = EmitScalarExpr(E->getArg(0)); 549 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 550 551 Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType()); 552 Value *IsLessThanInf = 553 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 554 APFloat Smallest = APFloat::getSmallestNormalized( 555 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 556 Value *IsNormal = 557 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 558 "isnormal"); 559 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 560 V = Builder.CreateAnd(V, IsNormal, "and"); 561 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 562 } 563 564 case Builtin::BI__builtin_isfinite: { 565 // isfinite(x) --> x == x && fabs(x) != infinity; 566 Value *V = EmitScalarExpr(E->getArg(0)); 567 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 568 569 Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType()); 570 Value *IsNotInf = 571 Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 572 573 V = Builder.CreateAnd(Eq, IsNotInf, "and"); 574 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 575 } 576 577 case Builtin::BI__builtin_fpclassify: { 578 Value *V = EmitScalarExpr(E->getArg(5)); 579 llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 580 581 // Create Result 582 BasicBlock *Begin = Builder.GetInsertBlock(); 583 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 584 Builder.SetInsertPoint(End); 585 PHINode *Result = 586 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 587 "fpclassify_result"); 588 589 // if (V==0) return FP_ZERO 590 Builder.SetInsertPoint(Begin); 591 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 592 "iszero"); 593 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 594 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 595 Builder.CreateCondBr(IsZero, End, NotZero); 596 Result->addIncoming(ZeroLiteral, Begin); 597 598 // if (V != V) return FP_NAN 599 Builder.SetInsertPoint(NotZero); 600 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 601 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 602 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 603 Builder.CreateCondBr(IsNan, End, NotNan); 604 Result->addIncoming(NanLiteral, NotZero); 605 606 // if (fabs(V) == infinity) return FP_INFINITY 607 Builder.SetInsertPoint(NotNan); 608 Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType()); 609 Value *IsInf = 610 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 611 "isinf"); 612 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 613 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 614 Builder.CreateCondBr(IsInf, End, NotInf); 615 Result->addIncoming(InfLiteral, NotNan); 616 617 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 618 Builder.SetInsertPoint(NotInf); 619 APFloat Smallest = APFloat::getSmallestNormalized( 620 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 621 Value *IsNormal = 622 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 623 "isnormal"); 624 Value *NormalResult = 625 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 626 EmitScalarExpr(E->getArg(3))); 627 Builder.CreateBr(End); 628 Result->addIncoming(NormalResult, NotInf); 629 630 // return Result 631 Builder.SetInsertPoint(End); 632 return RValue::get(Result); 633 } 634 635 case Builtin::BIalloca: 636 case Builtin::BI_alloca: 637 case Builtin::BI__builtin_alloca: { 638 Value *Size = EmitScalarExpr(E->getArg(0)); 639 return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size)); 640 } 641 case Builtin::BIbzero: 642 case Builtin::BI__builtin_bzero: { 643 std::pair<llvm::Value*, unsigned> Dest = 644 EmitPointerWithAlignment(E->getArg(0)); 645 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 646 Builder.CreateMemSet(Dest.first, Builder.getInt8(0), SizeVal, 647 Dest.second, false); 648 return RValue::get(Dest.first); 649 } 650 case Builtin::BImemcpy: 651 case Builtin::BI__builtin_memcpy: { 652 std::pair<llvm::Value*, unsigned> Dest = 653 EmitPointerWithAlignment(E->getArg(0)); 654 std::pair<llvm::Value*, unsigned> Src = 655 EmitPointerWithAlignment(E->getArg(1)); 656 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 657 unsigned Align = std::min(Dest.second, Src.second); 658 Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false); 659 return RValue::get(Dest.first); 660 } 661 662 case Builtin::BI__builtin___memcpy_chk: { 663 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. 664 llvm::APSInt Size, DstSize; 665 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 666 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 667 break; 668 if (Size.ugt(DstSize)) 669 break; 670 std::pair<llvm::Value*, unsigned> Dest = 671 EmitPointerWithAlignment(E->getArg(0)); 672 std::pair<llvm::Value*, unsigned> Src = 673 EmitPointerWithAlignment(E->getArg(1)); 674 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 675 unsigned Align = std::min(Dest.second, Src.second); 676 Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false); 677 return RValue::get(Dest.first); 678 } 679 680 case Builtin::BI__builtin_objc_memmove_collectable: { 681 Value *Address = EmitScalarExpr(E->getArg(0)); 682 Value *SrcAddr = EmitScalarExpr(E->getArg(1)); 683 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 684 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 685 Address, SrcAddr, SizeVal); 686 return RValue::get(Address); 687 } 688 689 case Builtin::BI__builtin___memmove_chk: { 690 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. 691 llvm::APSInt Size, DstSize; 692 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 693 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 694 break; 695 if (Size.ugt(DstSize)) 696 break; 697 std::pair<llvm::Value*, unsigned> Dest = 698 EmitPointerWithAlignment(E->getArg(0)); 699 std::pair<llvm::Value*, unsigned> Src = 700 EmitPointerWithAlignment(E->getArg(1)); 701 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 702 unsigned Align = std::min(Dest.second, Src.second); 703 Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false); 704 return RValue::get(Dest.first); 705 } 706 707 case Builtin::BImemmove: 708 case Builtin::BI__builtin_memmove: { 709 std::pair<llvm::Value*, unsigned> Dest = 710 EmitPointerWithAlignment(E->getArg(0)); 711 std::pair<llvm::Value*, unsigned> Src = 712 EmitPointerWithAlignment(E->getArg(1)); 713 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 714 unsigned Align = std::min(Dest.second, Src.second); 715 Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false); 716 return RValue::get(Dest.first); 717 } 718 case Builtin::BImemset: 719 case Builtin::BI__builtin_memset: { 720 std::pair<llvm::Value*, unsigned> Dest = 721 EmitPointerWithAlignment(E->getArg(0)); 722 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 723 Builder.getInt8Ty()); 724 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 725 Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false); 726 return RValue::get(Dest.first); 727 } 728 case Builtin::BI__builtin___memset_chk: { 729 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 730 llvm::APSInt Size, DstSize; 731 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 732 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 733 break; 734 if (Size.ugt(DstSize)) 735 break; 736 std::pair<llvm::Value*, unsigned> Dest = 737 EmitPointerWithAlignment(E->getArg(0)); 738 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 739 Builder.getInt8Ty()); 740 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 741 Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false); 742 return RValue::get(Dest.first); 743 } 744 case Builtin::BI__builtin_dwarf_cfa: { 745 // The offset in bytes from the first argument to the CFA. 746 // 747 // Why on earth is this in the frontend? Is there any reason at 748 // all that the backend can't reasonably determine this while 749 // lowering llvm.eh.dwarf.cfa()? 750 // 751 // TODO: If there's a satisfactory reason, add a target hook for 752 // this instead of hard-coding 0, which is correct for most targets. 753 int32_t Offset = 0; 754 755 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); 756 return RValue::get(Builder.CreateCall(F, 757 llvm::ConstantInt::get(Int32Ty, Offset))); 758 } 759 case Builtin::BI__builtin_return_address: { 760 Value *Depth = EmitScalarExpr(E->getArg(0)); 761 Depth = Builder.CreateIntCast(Depth, Int32Ty, false); 762 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 763 return RValue::get(Builder.CreateCall(F, Depth)); 764 } 765 case Builtin::BI__builtin_frame_address: { 766 Value *Depth = EmitScalarExpr(E->getArg(0)); 767 Depth = Builder.CreateIntCast(Depth, Int32Ty, false); 768 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); 769 return RValue::get(Builder.CreateCall(F, Depth)); 770 } 771 case Builtin::BI__builtin_extract_return_addr: { 772 Value *Address = EmitScalarExpr(E->getArg(0)); 773 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 774 return RValue::get(Result); 775 } 776 case Builtin::BI__builtin_frob_return_addr: { 777 Value *Address = EmitScalarExpr(E->getArg(0)); 778 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 779 return RValue::get(Result); 780 } 781 case Builtin::BI__builtin_dwarf_sp_column: { 782 llvm::IntegerType *Ty 783 = cast<llvm::IntegerType>(ConvertType(E->getType())); 784 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 785 if (Column == -1) { 786 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 787 return RValue::get(llvm::UndefValue::get(Ty)); 788 } 789 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 790 } 791 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 792 Value *Address = EmitScalarExpr(E->getArg(0)); 793 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 794 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 795 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 796 } 797 case Builtin::BI__builtin_eh_return: { 798 Value *Int = EmitScalarExpr(E->getArg(0)); 799 Value *Ptr = EmitScalarExpr(E->getArg(1)); 800 801 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 802 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 803 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 804 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 805 ? Intrinsic::eh_return_i32 806 : Intrinsic::eh_return_i64); 807 Builder.CreateCall2(F, Int, Ptr); 808 Builder.CreateUnreachable(); 809 810 // We do need to preserve an insertion point. 811 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 812 813 return RValue::get(nullptr); 814 } 815 case Builtin::BI__builtin_unwind_init: { 816 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); 817 return RValue::get(Builder.CreateCall(F)); 818 } 819 case Builtin::BI__builtin_extend_pointer: { 820 // Extends a pointer to the size of an _Unwind_Word, which is 821 // uint64_t on all platforms. Generally this gets poked into a 822 // register and eventually used as an address, so if the 823 // addressing registers are wider than pointers and the platform 824 // doesn't implicitly ignore high-order bits when doing 825 // addressing, we need to make sure we zext / sext based on 826 // the platform's expectations. 827 // 828 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 829 830 // Cast the pointer to intptr_t. 831 Value *Ptr = EmitScalarExpr(E->getArg(0)); 832 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 833 834 // If that's 64 bits, we're done. 835 if (IntPtrTy->getBitWidth() == 64) 836 return RValue::get(Result); 837 838 // Otherwise, ask the codegen data what to do. 839 if (getTargetHooks().extendPointerWithSExt()) 840 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 841 else 842 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 843 } 844 case Builtin::BI__builtin_setjmp: { 845 // Buffer is a void**. 846 Value *Buf = EmitScalarExpr(E->getArg(0)); 847 848 // Store the frame pointer to the setjmp buffer. 849 Value *FrameAddr = 850 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 851 ConstantInt::get(Int32Ty, 0)); 852 Builder.CreateStore(FrameAddr, Buf); 853 854 // Store the stack pointer to the setjmp buffer. 855 Value *StackAddr = 856 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 857 Value *StackSaveSlot = 858 Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2)); 859 Builder.CreateStore(StackAddr, StackSaveSlot); 860 861 // Call LLVM's EH setjmp, which is lightweight. 862 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 863 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 864 return RValue::get(Builder.CreateCall(F, Buf)); 865 } 866 case Builtin::BI__builtin_longjmp: { 867 Value *Buf = EmitScalarExpr(E->getArg(0)); 868 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 869 870 // Call LLVM's EH longjmp, which is lightweight. 871 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 872 873 // longjmp doesn't return; mark this as unreachable. 874 Builder.CreateUnreachable(); 875 876 // We do need to preserve an insertion point. 877 EmitBlock(createBasicBlock("longjmp.cont")); 878 879 return RValue::get(nullptr); 880 } 881 case Builtin::BI__sync_fetch_and_add: 882 case Builtin::BI__sync_fetch_and_sub: 883 case Builtin::BI__sync_fetch_and_or: 884 case Builtin::BI__sync_fetch_and_and: 885 case Builtin::BI__sync_fetch_and_xor: 886 case Builtin::BI__sync_fetch_and_nand: 887 case Builtin::BI__sync_add_and_fetch: 888 case Builtin::BI__sync_sub_and_fetch: 889 case Builtin::BI__sync_and_and_fetch: 890 case Builtin::BI__sync_or_and_fetch: 891 case Builtin::BI__sync_xor_and_fetch: 892 case Builtin::BI__sync_nand_and_fetch: 893 case Builtin::BI__sync_val_compare_and_swap: 894 case Builtin::BI__sync_bool_compare_and_swap: 895 case Builtin::BI__sync_lock_test_and_set: 896 case Builtin::BI__sync_lock_release: 897 case Builtin::BI__sync_swap: 898 llvm_unreachable("Shouldn't make it through sema"); 899 case Builtin::BI__sync_fetch_and_add_1: 900 case Builtin::BI__sync_fetch_and_add_2: 901 case Builtin::BI__sync_fetch_and_add_4: 902 case Builtin::BI__sync_fetch_and_add_8: 903 case Builtin::BI__sync_fetch_and_add_16: 904 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); 905 case Builtin::BI__sync_fetch_and_sub_1: 906 case Builtin::BI__sync_fetch_and_sub_2: 907 case Builtin::BI__sync_fetch_and_sub_4: 908 case Builtin::BI__sync_fetch_and_sub_8: 909 case Builtin::BI__sync_fetch_and_sub_16: 910 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); 911 case Builtin::BI__sync_fetch_and_or_1: 912 case Builtin::BI__sync_fetch_and_or_2: 913 case Builtin::BI__sync_fetch_and_or_4: 914 case Builtin::BI__sync_fetch_and_or_8: 915 case Builtin::BI__sync_fetch_and_or_16: 916 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); 917 case Builtin::BI__sync_fetch_and_and_1: 918 case Builtin::BI__sync_fetch_and_and_2: 919 case Builtin::BI__sync_fetch_and_and_4: 920 case Builtin::BI__sync_fetch_and_and_8: 921 case Builtin::BI__sync_fetch_and_and_16: 922 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); 923 case Builtin::BI__sync_fetch_and_xor_1: 924 case Builtin::BI__sync_fetch_and_xor_2: 925 case Builtin::BI__sync_fetch_and_xor_4: 926 case Builtin::BI__sync_fetch_and_xor_8: 927 case Builtin::BI__sync_fetch_and_xor_16: 928 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); 929 case Builtin::BI__sync_fetch_and_nand_1: 930 case Builtin::BI__sync_fetch_and_nand_2: 931 case Builtin::BI__sync_fetch_and_nand_4: 932 case Builtin::BI__sync_fetch_and_nand_8: 933 case Builtin::BI__sync_fetch_and_nand_16: 934 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); 935 936 // Clang extensions: not overloaded yet. 937 case Builtin::BI__sync_fetch_and_min: 938 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); 939 case Builtin::BI__sync_fetch_and_max: 940 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); 941 case Builtin::BI__sync_fetch_and_umin: 942 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); 943 case Builtin::BI__sync_fetch_and_umax: 944 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); 945 946 case Builtin::BI__sync_add_and_fetch_1: 947 case Builtin::BI__sync_add_and_fetch_2: 948 case Builtin::BI__sync_add_and_fetch_4: 949 case Builtin::BI__sync_add_and_fetch_8: 950 case Builtin::BI__sync_add_and_fetch_16: 951 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, 952 llvm::Instruction::Add); 953 case Builtin::BI__sync_sub_and_fetch_1: 954 case Builtin::BI__sync_sub_and_fetch_2: 955 case Builtin::BI__sync_sub_and_fetch_4: 956 case Builtin::BI__sync_sub_and_fetch_8: 957 case Builtin::BI__sync_sub_and_fetch_16: 958 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, 959 llvm::Instruction::Sub); 960 case Builtin::BI__sync_and_and_fetch_1: 961 case Builtin::BI__sync_and_and_fetch_2: 962 case Builtin::BI__sync_and_and_fetch_4: 963 case Builtin::BI__sync_and_and_fetch_8: 964 case Builtin::BI__sync_and_and_fetch_16: 965 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, 966 llvm::Instruction::And); 967 case Builtin::BI__sync_or_and_fetch_1: 968 case Builtin::BI__sync_or_and_fetch_2: 969 case Builtin::BI__sync_or_and_fetch_4: 970 case Builtin::BI__sync_or_and_fetch_8: 971 case Builtin::BI__sync_or_and_fetch_16: 972 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, 973 llvm::Instruction::Or); 974 case Builtin::BI__sync_xor_and_fetch_1: 975 case Builtin::BI__sync_xor_and_fetch_2: 976 case Builtin::BI__sync_xor_and_fetch_4: 977 case Builtin::BI__sync_xor_and_fetch_8: 978 case Builtin::BI__sync_xor_and_fetch_16: 979 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, 980 llvm::Instruction::Xor); 981 case Builtin::BI__sync_nand_and_fetch_1: 982 case Builtin::BI__sync_nand_and_fetch_2: 983 case Builtin::BI__sync_nand_and_fetch_4: 984 case Builtin::BI__sync_nand_and_fetch_8: 985 case Builtin::BI__sync_nand_and_fetch_16: 986 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E, 987 llvm::Instruction::And, true); 988 989 case Builtin::BI__sync_val_compare_and_swap_1: 990 case Builtin::BI__sync_val_compare_and_swap_2: 991 case Builtin::BI__sync_val_compare_and_swap_4: 992 case Builtin::BI__sync_val_compare_and_swap_8: 993 case Builtin::BI__sync_val_compare_and_swap_16: { 994 QualType T = E->getType(); 995 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0)); 996 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 997 998 llvm::IntegerType *IntType = 999 llvm::IntegerType::get(getLLVMContext(), 1000 getContext().getTypeSize(T)); 1001 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 1002 1003 Value *Args[3]; 1004 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType); 1005 Args[1] = EmitScalarExpr(E->getArg(1)); 1006 llvm::Type *ValueType = Args[1]->getType(); 1007 Args[1] = EmitToInt(*this, Args[1], T, IntType); 1008 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType); 1009 1010 Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2], 1011 llvm::SequentiallyConsistent, 1012 llvm::SequentiallyConsistent); 1013 Result = Builder.CreateExtractValue(Result, 0); 1014 Result = EmitFromInt(*this, Result, T, ValueType); 1015 return RValue::get(Result); 1016 } 1017 1018 case Builtin::BI__sync_bool_compare_and_swap_1: 1019 case Builtin::BI__sync_bool_compare_and_swap_2: 1020 case Builtin::BI__sync_bool_compare_and_swap_4: 1021 case Builtin::BI__sync_bool_compare_and_swap_8: 1022 case Builtin::BI__sync_bool_compare_and_swap_16: { 1023 QualType T = E->getArg(1)->getType(); 1024 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0)); 1025 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 1026 1027 llvm::IntegerType *IntType = 1028 llvm::IntegerType::get(getLLVMContext(), 1029 getContext().getTypeSize(T)); 1030 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 1031 1032 Value *Args[3]; 1033 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType); 1034 Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType); 1035 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType); 1036 1037 Value *Pair = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2], 1038 llvm::SequentiallyConsistent, 1039 llvm::SequentiallyConsistent); 1040 Value *Result = Builder.CreateExtractValue(Pair, 1); 1041 // zext bool to int. 1042 Result = Builder.CreateZExt(Result, ConvertType(E->getType())); 1043 return RValue::get(Result); 1044 } 1045 1046 case Builtin::BI__sync_swap_1: 1047 case Builtin::BI__sync_swap_2: 1048 case Builtin::BI__sync_swap_4: 1049 case Builtin::BI__sync_swap_8: 1050 case Builtin::BI__sync_swap_16: 1051 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1052 1053 case Builtin::BI__sync_lock_test_and_set_1: 1054 case Builtin::BI__sync_lock_test_and_set_2: 1055 case Builtin::BI__sync_lock_test_and_set_4: 1056 case Builtin::BI__sync_lock_test_and_set_8: 1057 case Builtin::BI__sync_lock_test_and_set_16: 1058 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1059 1060 case Builtin::BI__sync_lock_release_1: 1061 case Builtin::BI__sync_lock_release_2: 1062 case Builtin::BI__sync_lock_release_4: 1063 case Builtin::BI__sync_lock_release_8: 1064 case Builtin::BI__sync_lock_release_16: { 1065 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1066 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 1067 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 1068 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 1069 StoreSize.getQuantity() * 8); 1070 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 1071 llvm::StoreInst *Store = 1072 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr); 1073 Store->setAlignment(StoreSize.getQuantity()); 1074 Store->setAtomic(llvm::Release); 1075 return RValue::get(nullptr); 1076 } 1077 1078 case Builtin::BI__sync_synchronize: { 1079 // We assume this is supposed to correspond to a C++0x-style 1080 // sequentially-consistent fence (i.e. this is only usable for 1081 // synchonization, not device I/O or anything like that). This intrinsic 1082 // is really badly designed in the sense that in theory, there isn't 1083 // any way to safely use it... but in practice, it mostly works 1084 // to use it with non-atomic loads and stores to get acquire/release 1085 // semantics. 1086 Builder.CreateFence(llvm::SequentiallyConsistent); 1087 return RValue::get(nullptr); 1088 } 1089 1090 case Builtin::BI__c11_atomic_is_lock_free: 1091 case Builtin::BI__atomic_is_lock_free: { 1092 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the 1093 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since 1094 // _Atomic(T) is always properly-aligned. 1095 const char *LibCallName = "__atomic_is_lock_free"; 1096 CallArgList Args; 1097 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), 1098 getContext().getSizeType()); 1099 if (BuiltinID == Builtin::BI__atomic_is_lock_free) 1100 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), 1101 getContext().VoidPtrTy); 1102 else 1103 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), 1104 getContext().VoidPtrTy); 1105 const CGFunctionInfo &FuncInfo = 1106 CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args, 1107 FunctionType::ExtInfo(), 1108 RequiredArgs::All); 1109 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 1110 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 1111 return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args); 1112 } 1113 1114 case Builtin::BI__atomic_test_and_set: { 1115 // Look at the argument type to determine whether this is a volatile 1116 // operation. The parameter type is always volatile. 1117 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1118 bool Volatile = 1119 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1120 1121 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1122 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1123 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1124 Value *NewVal = Builder.getInt8(1); 1125 Value *Order = EmitScalarExpr(E->getArg(1)); 1126 if (isa<llvm::ConstantInt>(Order)) { 1127 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1128 AtomicRMWInst *Result = nullptr; 1129 switch (ord) { 1130 case 0: // memory_order_relaxed 1131 default: // invalid order 1132 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1133 Ptr, NewVal, 1134 llvm::Monotonic); 1135 break; 1136 case 1: // memory_order_consume 1137 case 2: // memory_order_acquire 1138 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1139 Ptr, NewVal, 1140 llvm::Acquire); 1141 break; 1142 case 3: // memory_order_release 1143 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1144 Ptr, NewVal, 1145 llvm::Release); 1146 break; 1147 case 4: // memory_order_acq_rel 1148 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1149 Ptr, NewVal, 1150 llvm::AcquireRelease); 1151 break; 1152 case 5: // memory_order_seq_cst 1153 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1154 Ptr, NewVal, 1155 llvm::SequentiallyConsistent); 1156 break; 1157 } 1158 Result->setVolatile(Volatile); 1159 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1160 } 1161 1162 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1163 1164 llvm::BasicBlock *BBs[5] = { 1165 createBasicBlock("monotonic", CurFn), 1166 createBasicBlock("acquire", CurFn), 1167 createBasicBlock("release", CurFn), 1168 createBasicBlock("acqrel", CurFn), 1169 createBasicBlock("seqcst", CurFn) 1170 }; 1171 llvm::AtomicOrdering Orders[5] = { 1172 llvm::Monotonic, llvm::Acquire, llvm::Release, 1173 llvm::AcquireRelease, llvm::SequentiallyConsistent 1174 }; 1175 1176 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1177 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1178 1179 Builder.SetInsertPoint(ContBB); 1180 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); 1181 1182 for (unsigned i = 0; i < 5; ++i) { 1183 Builder.SetInsertPoint(BBs[i]); 1184 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1185 Ptr, NewVal, Orders[i]); 1186 RMW->setVolatile(Volatile); 1187 Result->addIncoming(RMW, BBs[i]); 1188 Builder.CreateBr(ContBB); 1189 } 1190 1191 SI->addCase(Builder.getInt32(0), BBs[0]); 1192 SI->addCase(Builder.getInt32(1), BBs[1]); 1193 SI->addCase(Builder.getInt32(2), BBs[1]); 1194 SI->addCase(Builder.getInt32(3), BBs[2]); 1195 SI->addCase(Builder.getInt32(4), BBs[3]); 1196 SI->addCase(Builder.getInt32(5), BBs[4]); 1197 1198 Builder.SetInsertPoint(ContBB); 1199 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1200 } 1201 1202 case Builtin::BI__atomic_clear: { 1203 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1204 bool Volatile = 1205 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1206 1207 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1208 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1209 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1210 Value *NewVal = Builder.getInt8(0); 1211 Value *Order = EmitScalarExpr(E->getArg(1)); 1212 if (isa<llvm::ConstantInt>(Order)) { 1213 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1214 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1215 Store->setAlignment(1); 1216 switch (ord) { 1217 case 0: // memory_order_relaxed 1218 default: // invalid order 1219 Store->setOrdering(llvm::Monotonic); 1220 break; 1221 case 3: // memory_order_release 1222 Store->setOrdering(llvm::Release); 1223 break; 1224 case 5: // memory_order_seq_cst 1225 Store->setOrdering(llvm::SequentiallyConsistent); 1226 break; 1227 } 1228 return RValue::get(nullptr); 1229 } 1230 1231 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1232 1233 llvm::BasicBlock *BBs[3] = { 1234 createBasicBlock("monotonic", CurFn), 1235 createBasicBlock("release", CurFn), 1236 createBasicBlock("seqcst", CurFn) 1237 }; 1238 llvm::AtomicOrdering Orders[3] = { 1239 llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent 1240 }; 1241 1242 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1243 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1244 1245 for (unsigned i = 0; i < 3; ++i) { 1246 Builder.SetInsertPoint(BBs[i]); 1247 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1248 Store->setAlignment(1); 1249 Store->setOrdering(Orders[i]); 1250 Builder.CreateBr(ContBB); 1251 } 1252 1253 SI->addCase(Builder.getInt32(0), BBs[0]); 1254 SI->addCase(Builder.getInt32(3), BBs[1]); 1255 SI->addCase(Builder.getInt32(5), BBs[2]); 1256 1257 Builder.SetInsertPoint(ContBB); 1258 return RValue::get(nullptr); 1259 } 1260 1261 case Builtin::BI__atomic_thread_fence: 1262 case Builtin::BI__atomic_signal_fence: 1263 case Builtin::BI__c11_atomic_thread_fence: 1264 case Builtin::BI__c11_atomic_signal_fence: { 1265 llvm::SynchronizationScope Scope; 1266 if (BuiltinID == Builtin::BI__atomic_signal_fence || 1267 BuiltinID == Builtin::BI__c11_atomic_signal_fence) 1268 Scope = llvm::SingleThread; 1269 else 1270 Scope = llvm::CrossThread; 1271 Value *Order = EmitScalarExpr(E->getArg(0)); 1272 if (isa<llvm::ConstantInt>(Order)) { 1273 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1274 switch (ord) { 1275 case 0: // memory_order_relaxed 1276 default: // invalid order 1277 break; 1278 case 1: // memory_order_consume 1279 case 2: // memory_order_acquire 1280 Builder.CreateFence(llvm::Acquire, Scope); 1281 break; 1282 case 3: // memory_order_release 1283 Builder.CreateFence(llvm::Release, Scope); 1284 break; 1285 case 4: // memory_order_acq_rel 1286 Builder.CreateFence(llvm::AcquireRelease, Scope); 1287 break; 1288 case 5: // memory_order_seq_cst 1289 Builder.CreateFence(llvm::SequentiallyConsistent, Scope); 1290 break; 1291 } 1292 return RValue::get(nullptr); 1293 } 1294 1295 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; 1296 AcquireBB = createBasicBlock("acquire", CurFn); 1297 ReleaseBB = createBasicBlock("release", CurFn); 1298 AcqRelBB = createBasicBlock("acqrel", CurFn); 1299 SeqCstBB = createBasicBlock("seqcst", CurFn); 1300 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1301 1302 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1303 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); 1304 1305 Builder.SetInsertPoint(AcquireBB); 1306 Builder.CreateFence(llvm::Acquire, Scope); 1307 Builder.CreateBr(ContBB); 1308 SI->addCase(Builder.getInt32(1), AcquireBB); 1309 SI->addCase(Builder.getInt32(2), AcquireBB); 1310 1311 Builder.SetInsertPoint(ReleaseBB); 1312 Builder.CreateFence(llvm::Release, Scope); 1313 Builder.CreateBr(ContBB); 1314 SI->addCase(Builder.getInt32(3), ReleaseBB); 1315 1316 Builder.SetInsertPoint(AcqRelBB); 1317 Builder.CreateFence(llvm::AcquireRelease, Scope); 1318 Builder.CreateBr(ContBB); 1319 SI->addCase(Builder.getInt32(4), AcqRelBB); 1320 1321 Builder.SetInsertPoint(SeqCstBB); 1322 Builder.CreateFence(llvm::SequentiallyConsistent, Scope); 1323 Builder.CreateBr(ContBB); 1324 SI->addCase(Builder.getInt32(5), SeqCstBB); 1325 1326 Builder.SetInsertPoint(ContBB); 1327 return RValue::get(nullptr); 1328 } 1329 1330 // Library functions with special handling. 1331 case Builtin::BIsqrt: 1332 case Builtin::BIsqrtf: 1333 case Builtin::BIsqrtl: { 1334 // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only 1335 // in finite- or unsafe-math mode (the intrinsic has different semantics 1336 // for handling negative numbers compared to the library function, so 1337 // -fmath-errno=0 is not enough). 1338 if (!FD->hasAttr<ConstAttr>()) 1339 break; 1340 if (!(CGM.getCodeGenOpts().UnsafeFPMath || 1341 CGM.getCodeGenOpts().NoNaNsFPMath)) 1342 break; 1343 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 1344 llvm::Type *ArgType = Arg0->getType(); 1345 Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType); 1346 return RValue::get(Builder.CreateCall(F, Arg0)); 1347 } 1348 1349 case Builtin::BIpow: 1350 case Builtin::BIpowf: 1351 case Builtin::BIpowl: { 1352 // Transform a call to pow* into a @llvm.pow.* intrinsic call. 1353 if (!FD->hasAttr<ConstAttr>()) 1354 break; 1355 Value *Base = EmitScalarExpr(E->getArg(0)); 1356 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1357 llvm::Type *ArgType = Base->getType(); 1358 Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType); 1359 return RValue::get(Builder.CreateCall2(F, Base, Exponent)); 1360 } 1361 1362 case Builtin::BIfma: 1363 case Builtin::BIfmaf: 1364 case Builtin::BIfmal: 1365 case Builtin::BI__builtin_fma: 1366 case Builtin::BI__builtin_fmaf: 1367 case Builtin::BI__builtin_fmal: { 1368 // Rewrite fma to intrinsic. 1369 Value *FirstArg = EmitScalarExpr(E->getArg(0)); 1370 llvm::Type *ArgType = FirstArg->getType(); 1371 Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType); 1372 return RValue::get(Builder.CreateCall3(F, FirstArg, 1373 EmitScalarExpr(E->getArg(1)), 1374 EmitScalarExpr(E->getArg(2)))); 1375 } 1376 1377 case Builtin::BI__builtin_signbit: 1378 case Builtin::BI__builtin_signbitf: 1379 case Builtin::BI__builtin_signbitl: { 1380 LLVMContext &C = CGM.getLLVMContext(); 1381 1382 Value *Arg = EmitScalarExpr(E->getArg(0)); 1383 llvm::Type *ArgTy = Arg->getType(); 1384 int ArgWidth = ArgTy->getPrimitiveSizeInBits(); 1385 llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth); 1386 Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy); 1387 if (ArgTy->isPPC_FP128Ty()) { 1388 // The higher-order double comes first, and so we need to truncate the 1389 // pair to extract the overall sign. The order of the pair is the same 1390 // in both little- and big-Endian modes. 1391 ArgWidth >>= 1; 1392 ArgIntTy = llvm::IntegerType::get(C, ArgWidth); 1393 BCArg = Builder.CreateTrunc(BCArg, ArgIntTy); 1394 } 1395 Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy); 1396 Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp); 1397 return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType()))); 1398 } 1399 case Builtin::BI__builtin_annotation: { 1400 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); 1401 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, 1402 AnnVal->getType()); 1403 1404 // Get the annotation string, go through casts. Sema requires this to be a 1405 // non-wide string literal, potentially casted, so the cast<> is safe. 1406 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); 1407 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); 1408 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); 1409 } 1410 case Builtin::BI__builtin_addcb: 1411 case Builtin::BI__builtin_addcs: 1412 case Builtin::BI__builtin_addc: 1413 case Builtin::BI__builtin_addcl: 1414 case Builtin::BI__builtin_addcll: 1415 case Builtin::BI__builtin_subcb: 1416 case Builtin::BI__builtin_subcs: 1417 case Builtin::BI__builtin_subc: 1418 case Builtin::BI__builtin_subcl: 1419 case Builtin::BI__builtin_subcll: { 1420 1421 // We translate all of these builtins from expressions of the form: 1422 // int x = ..., y = ..., carryin = ..., carryout, result; 1423 // result = __builtin_addc(x, y, carryin, &carryout); 1424 // 1425 // to LLVM IR of the form: 1426 // 1427 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) 1428 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 1429 // %carry1 = extractvalue {i32, i1} %tmp1, 1 1430 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, 1431 // i32 %carryin) 1432 // %result = extractvalue {i32, i1} %tmp2, 0 1433 // %carry2 = extractvalue {i32, i1} %tmp2, 1 1434 // %tmp3 = or i1 %carry1, %carry2 1435 // %tmp4 = zext i1 %tmp3 to i32 1436 // store i32 %tmp4, i32* %carryout 1437 1438 // Scalarize our inputs. 1439 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1440 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1441 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); 1442 std::pair<llvm::Value*, unsigned> CarryOutPtr = 1443 EmitPointerWithAlignment(E->getArg(3)); 1444 1445 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. 1446 llvm::Intrinsic::ID IntrinsicId; 1447 switch (BuiltinID) { 1448 default: llvm_unreachable("Unknown multiprecision builtin id."); 1449 case Builtin::BI__builtin_addcb: 1450 case Builtin::BI__builtin_addcs: 1451 case Builtin::BI__builtin_addc: 1452 case Builtin::BI__builtin_addcl: 1453 case Builtin::BI__builtin_addcll: 1454 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 1455 break; 1456 case Builtin::BI__builtin_subcb: 1457 case Builtin::BI__builtin_subcs: 1458 case Builtin::BI__builtin_subc: 1459 case Builtin::BI__builtin_subcl: 1460 case Builtin::BI__builtin_subcll: 1461 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 1462 break; 1463 } 1464 1465 // Construct our resulting LLVM IR expression. 1466 llvm::Value *Carry1; 1467 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, 1468 X, Y, Carry1); 1469 llvm::Value *Carry2; 1470 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, 1471 Sum1, Carryin, Carry2); 1472 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), 1473 X->getType()); 1474 llvm::StoreInst *CarryOutStore = Builder.CreateStore(CarryOut, 1475 CarryOutPtr.first); 1476 CarryOutStore->setAlignment(CarryOutPtr.second); 1477 return RValue::get(Sum2); 1478 } 1479 case Builtin::BI__builtin_uadd_overflow: 1480 case Builtin::BI__builtin_uaddl_overflow: 1481 case Builtin::BI__builtin_uaddll_overflow: 1482 case Builtin::BI__builtin_usub_overflow: 1483 case Builtin::BI__builtin_usubl_overflow: 1484 case Builtin::BI__builtin_usubll_overflow: 1485 case Builtin::BI__builtin_umul_overflow: 1486 case Builtin::BI__builtin_umull_overflow: 1487 case Builtin::BI__builtin_umulll_overflow: 1488 case Builtin::BI__builtin_sadd_overflow: 1489 case Builtin::BI__builtin_saddl_overflow: 1490 case Builtin::BI__builtin_saddll_overflow: 1491 case Builtin::BI__builtin_ssub_overflow: 1492 case Builtin::BI__builtin_ssubl_overflow: 1493 case Builtin::BI__builtin_ssubll_overflow: 1494 case Builtin::BI__builtin_smul_overflow: 1495 case Builtin::BI__builtin_smull_overflow: 1496 case Builtin::BI__builtin_smulll_overflow: { 1497 1498 // We translate all of these builtins directly to the relevant llvm IR node. 1499 1500 // Scalarize our inputs. 1501 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1502 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1503 std::pair<llvm::Value *, unsigned> SumOutPtr = 1504 EmitPointerWithAlignment(E->getArg(2)); 1505 1506 // Decide which of the overflow intrinsics we are lowering to: 1507 llvm::Intrinsic::ID IntrinsicId; 1508 switch (BuiltinID) { 1509 default: llvm_unreachable("Unknown security overflow builtin id."); 1510 case Builtin::BI__builtin_uadd_overflow: 1511 case Builtin::BI__builtin_uaddl_overflow: 1512 case Builtin::BI__builtin_uaddll_overflow: 1513 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 1514 break; 1515 case Builtin::BI__builtin_usub_overflow: 1516 case Builtin::BI__builtin_usubl_overflow: 1517 case Builtin::BI__builtin_usubll_overflow: 1518 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 1519 break; 1520 case Builtin::BI__builtin_umul_overflow: 1521 case Builtin::BI__builtin_umull_overflow: 1522 case Builtin::BI__builtin_umulll_overflow: 1523 IntrinsicId = llvm::Intrinsic::umul_with_overflow; 1524 break; 1525 case Builtin::BI__builtin_sadd_overflow: 1526 case Builtin::BI__builtin_saddl_overflow: 1527 case Builtin::BI__builtin_saddll_overflow: 1528 IntrinsicId = llvm::Intrinsic::sadd_with_overflow; 1529 break; 1530 case Builtin::BI__builtin_ssub_overflow: 1531 case Builtin::BI__builtin_ssubl_overflow: 1532 case Builtin::BI__builtin_ssubll_overflow: 1533 IntrinsicId = llvm::Intrinsic::ssub_with_overflow; 1534 break; 1535 case Builtin::BI__builtin_smul_overflow: 1536 case Builtin::BI__builtin_smull_overflow: 1537 case Builtin::BI__builtin_smulll_overflow: 1538 IntrinsicId = llvm::Intrinsic::smul_with_overflow; 1539 break; 1540 } 1541 1542 1543 llvm::Value *Carry; 1544 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); 1545 llvm::StoreInst *SumOutStore = Builder.CreateStore(Sum, SumOutPtr.first); 1546 SumOutStore->setAlignment(SumOutPtr.second); 1547 1548 return RValue::get(Carry); 1549 } 1550 case Builtin::BI__builtin_addressof: 1551 return RValue::get(EmitLValue(E->getArg(0)).getAddress()); 1552 case Builtin::BI__builtin_operator_new: 1553 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 1554 E->getArg(0), false); 1555 case Builtin::BI__builtin_operator_delete: 1556 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 1557 E->getArg(0), true); 1558 case Builtin::BI__noop: 1559 // __noop always evaluates to an integer literal zero. 1560 return RValue::get(ConstantInt::get(IntTy, 0)); 1561 case Builtin::BI_InterlockedExchange: 1562 case Builtin::BI_InterlockedExchangePointer: 1563 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1564 case Builtin::BI_InterlockedCompareExchangePointer: { 1565 llvm::Type *RTy; 1566 llvm::IntegerType *IntType = 1567 IntegerType::get(getLLVMContext(), 1568 getContext().getTypeSize(E->getType())); 1569 llvm::Type *IntPtrType = IntType->getPointerTo(); 1570 1571 llvm::Value *Destination = 1572 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType); 1573 1574 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); 1575 RTy = Exchange->getType(); 1576 Exchange = Builder.CreatePtrToInt(Exchange, IntType); 1577 1578 llvm::Value *Comparand = 1579 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); 1580 1581 auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, 1582 SequentiallyConsistent, 1583 SequentiallyConsistent); 1584 Result->setVolatile(true); 1585 1586 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, 1587 0), 1588 RTy)); 1589 } 1590 case Builtin::BI_InterlockedCompareExchange: { 1591 AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg( 1592 EmitScalarExpr(E->getArg(0)), 1593 EmitScalarExpr(E->getArg(2)), 1594 EmitScalarExpr(E->getArg(1)), 1595 SequentiallyConsistent, 1596 SequentiallyConsistent); 1597 CXI->setVolatile(true); 1598 return RValue::get(Builder.CreateExtractValue(CXI, 0)); 1599 } 1600 case Builtin::BI_InterlockedIncrement: { 1601 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 1602 AtomicRMWInst::Add, 1603 EmitScalarExpr(E->getArg(0)), 1604 ConstantInt::get(Int32Ty, 1), 1605 llvm::SequentiallyConsistent); 1606 RMWI->setVolatile(true); 1607 return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(Int32Ty, 1))); 1608 } 1609 case Builtin::BI_InterlockedDecrement: { 1610 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 1611 AtomicRMWInst::Sub, 1612 EmitScalarExpr(E->getArg(0)), 1613 ConstantInt::get(Int32Ty, 1), 1614 llvm::SequentiallyConsistent); 1615 RMWI->setVolatile(true); 1616 return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(Int32Ty, 1))); 1617 } 1618 case Builtin::BI_InterlockedExchangeAdd: { 1619 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 1620 AtomicRMWInst::Add, 1621 EmitScalarExpr(E->getArg(0)), 1622 EmitScalarExpr(E->getArg(1)), 1623 llvm::SequentiallyConsistent); 1624 RMWI->setVolatile(true); 1625 return RValue::get(RMWI); 1626 } 1627 } 1628 1629 // If this is an alias for a lib function (e.g. __builtin_sin), emit 1630 // the call using the normal call path, but using the unmangled 1631 // version of the function name. 1632 if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) 1633 return emitLibraryCall(*this, FD, E, 1634 CGM.getBuiltinLibFunction(FD, BuiltinID)); 1635 1636 // If this is a predefined lib function (e.g. malloc), emit the call 1637 // using exactly the normal call path. 1638 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 1639 return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee())); 1640 1641 // See if we have a target specific intrinsic. 1642 const char *Name = getContext().BuiltinInfo.GetName(BuiltinID); 1643 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 1644 if (const char *Prefix = 1645 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) { 1646 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name); 1647 // NOTE we dont need to perform a compatibility flag check here since the 1648 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the 1649 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. 1650 if (IntrinsicID == Intrinsic::not_intrinsic) 1651 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix, Name); 1652 } 1653 1654 if (IntrinsicID != Intrinsic::not_intrinsic) { 1655 SmallVector<Value*, 16> Args; 1656 1657 // Find out if any arguments are required to be integer constant 1658 // expressions. 1659 unsigned ICEArguments = 0; 1660 ASTContext::GetBuiltinTypeError Error; 1661 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 1662 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 1663 1664 Function *F = CGM.getIntrinsic(IntrinsicID); 1665 llvm::FunctionType *FTy = F->getFunctionType(); 1666 1667 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 1668 Value *ArgValue; 1669 // If this is a normal argument, just emit it as a scalar. 1670 if ((ICEArguments & (1 << i)) == 0) { 1671 ArgValue = EmitScalarExpr(E->getArg(i)); 1672 } else { 1673 // If this is required to be a constant, constant fold it so that we 1674 // know that the generated intrinsic gets a ConstantInt. 1675 llvm::APSInt Result; 1676 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 1677 assert(IsConst && "Constant arg isn't actually constant?"); 1678 (void)IsConst; 1679 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 1680 } 1681 1682 // If the intrinsic arg type is different from the builtin arg type 1683 // we need to do a bit cast. 1684 llvm::Type *PTy = FTy->getParamType(i); 1685 if (PTy != ArgValue->getType()) { 1686 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 1687 "Must be able to losslessly bit cast to param"); 1688 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 1689 } 1690 1691 Args.push_back(ArgValue); 1692 } 1693 1694 Value *V = Builder.CreateCall(F, Args); 1695 QualType BuiltinRetType = E->getType(); 1696 1697 llvm::Type *RetTy = VoidTy; 1698 if (!BuiltinRetType->isVoidType()) 1699 RetTy = ConvertType(BuiltinRetType); 1700 1701 if (RetTy != V->getType()) { 1702 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 1703 "Must be able to losslessly bit cast result type"); 1704 V = Builder.CreateBitCast(V, RetTy); 1705 } 1706 1707 return RValue::get(V); 1708 } 1709 1710 // See if we have a target specific builtin that needs to be lowered. 1711 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 1712 return RValue::get(V); 1713 1714 ErrorUnsupported(E, "builtin function"); 1715 1716 // Unknown builtin, for now just dump it out and return undef. 1717 return GetUndefRValue(E->getType()); 1718 } 1719 1720 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 1721 const CallExpr *E) { 1722 switch (getTarget().getTriple().getArch()) { 1723 case llvm::Triple::arm: 1724 case llvm::Triple::armeb: 1725 case llvm::Triple::thumb: 1726 case llvm::Triple::thumbeb: 1727 return EmitARMBuiltinExpr(BuiltinID, E); 1728 case llvm::Triple::aarch64: 1729 case llvm::Triple::aarch64_be: 1730 return EmitAArch64BuiltinExpr(BuiltinID, E); 1731 case llvm::Triple::x86: 1732 case llvm::Triple::x86_64: 1733 return EmitX86BuiltinExpr(BuiltinID, E); 1734 case llvm::Triple::ppc: 1735 case llvm::Triple::ppc64: 1736 case llvm::Triple::ppc64le: 1737 return EmitPPCBuiltinExpr(BuiltinID, E); 1738 case llvm::Triple::r600: 1739 return EmitR600BuiltinExpr(BuiltinID, E); 1740 default: 1741 return nullptr; 1742 } 1743 } 1744 1745 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, 1746 NeonTypeFlags TypeFlags, 1747 bool V1Ty=false) { 1748 int IsQuad = TypeFlags.isQuad(); 1749 switch (TypeFlags.getEltType()) { 1750 case NeonTypeFlags::Int8: 1751 case NeonTypeFlags::Poly8: 1752 return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); 1753 case NeonTypeFlags::Int16: 1754 case NeonTypeFlags::Poly16: 1755 case NeonTypeFlags::Float16: 1756 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 1757 case NeonTypeFlags::Int32: 1758 return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); 1759 case NeonTypeFlags::Int64: 1760 case NeonTypeFlags::Poly64: 1761 return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); 1762 case NeonTypeFlags::Poly128: 1763 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. 1764 // There is a lot of i128 and f128 API missing. 1765 // so we use v16i8 to represent poly128 and get pattern matched. 1766 return llvm::VectorType::get(CGF->Int8Ty, 16); 1767 case NeonTypeFlags::Float32: 1768 return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); 1769 case NeonTypeFlags::Float64: 1770 return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); 1771 } 1772 llvm_unreachable("Unknown vector element type!"); 1773 } 1774 1775 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 1776 unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements(); 1777 Value* SV = llvm::ConstantVector::getSplat(nElts, C); 1778 return Builder.CreateShuffleVector(V, V, SV, "lane"); 1779 } 1780 1781 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 1782 const char *name, 1783 unsigned shift, bool rightshift) { 1784 unsigned j = 0; 1785 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 1786 ai != ae; ++ai, ++j) 1787 if (shift > 0 && shift == j) 1788 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 1789 else 1790 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 1791 1792 return Builder.CreateCall(F, Ops, name); 1793 } 1794 1795 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 1796 bool neg) { 1797 int SV = cast<ConstantInt>(V)->getSExtValue(); 1798 1799 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 1800 llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV); 1801 return llvm::ConstantVector::getSplat(VTy->getNumElements(), C); 1802 } 1803 1804 // \brief Right-shift a vector by a constant. 1805 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, 1806 llvm::Type *Ty, bool usgn, 1807 const char *name) { 1808 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 1809 1810 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); 1811 int EltSize = VTy->getScalarSizeInBits(); 1812 1813 Vec = Builder.CreateBitCast(Vec, Ty); 1814 1815 // lshr/ashr are undefined when the shift amount is equal to the vector 1816 // element size. 1817 if (ShiftAmt == EltSize) { 1818 if (usgn) { 1819 // Right-shifting an unsigned value by its size yields 0. 1820 llvm::Constant *Zero = ConstantInt::get(VTy->getElementType(), 0); 1821 return llvm::ConstantVector::getSplat(VTy->getNumElements(), Zero); 1822 } else { 1823 // Right-shifting a signed value by its size is equivalent 1824 // to a shift of size-1. 1825 --ShiftAmt; 1826 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); 1827 } 1828 } 1829 1830 Shift = EmitNeonShiftVector(Shift, Ty, false); 1831 if (usgn) 1832 return Builder.CreateLShr(Vec, Shift, name); 1833 else 1834 return Builder.CreateAShr(Vec, Shift, name); 1835 } 1836 1837 /// GetPointeeAlignment - Given an expression with a pointer type, find the 1838 /// alignment of the type referenced by the pointer. Skip over implicit 1839 /// casts. 1840 std::pair<llvm::Value*, unsigned> 1841 CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) { 1842 assert(Addr->getType()->isPointerType()); 1843 Addr = Addr->IgnoreParens(); 1844 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Addr)) { 1845 if ((ICE->getCastKind() == CK_BitCast || ICE->getCastKind() == CK_NoOp) && 1846 ICE->getSubExpr()->getType()->isPointerType()) { 1847 std::pair<llvm::Value*, unsigned> Ptr = 1848 EmitPointerWithAlignment(ICE->getSubExpr()); 1849 Ptr.first = Builder.CreateBitCast(Ptr.first, 1850 ConvertType(Addr->getType())); 1851 return Ptr; 1852 } else if (ICE->getCastKind() == CK_ArrayToPointerDecay) { 1853 LValue LV = EmitLValue(ICE->getSubExpr()); 1854 unsigned Align = LV.getAlignment().getQuantity(); 1855 if (!Align) { 1856 // FIXME: Once LValues are fixed to always set alignment, 1857 // zap this code. 1858 QualType PtTy = ICE->getSubExpr()->getType(); 1859 if (!PtTy->isIncompleteType()) 1860 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 1861 else 1862 Align = 1; 1863 } 1864 return std::make_pair(LV.getAddress(), Align); 1865 } 1866 } 1867 if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(Addr)) { 1868 if (UO->getOpcode() == UO_AddrOf) { 1869 LValue LV = EmitLValue(UO->getSubExpr()); 1870 unsigned Align = LV.getAlignment().getQuantity(); 1871 if (!Align) { 1872 // FIXME: Once LValues are fixed to always set alignment, 1873 // zap this code. 1874 QualType PtTy = UO->getSubExpr()->getType(); 1875 if (!PtTy->isIncompleteType()) 1876 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 1877 else 1878 Align = 1; 1879 } 1880 return std::make_pair(LV.getAddress(), Align); 1881 } 1882 } 1883 1884 unsigned Align = 1; 1885 QualType PtTy = Addr->getType()->getPointeeType(); 1886 if (!PtTy->isIncompleteType()) 1887 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 1888 1889 return std::make_pair(EmitScalarExpr(Addr), Align); 1890 } 1891 1892 enum { 1893 AddRetType = (1 << 0), 1894 Add1ArgType = (1 << 1), 1895 Add2ArgTypes = (1 << 2), 1896 1897 VectorizeRetType = (1 << 3), 1898 VectorizeArgTypes = (1 << 4), 1899 1900 InventFloatType = (1 << 5), 1901 UnsignedAlts = (1 << 6), 1902 1903 Use64BitVectors = (1 << 7), 1904 Use128BitVectors = (1 << 8), 1905 1906 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, 1907 VectorRet = AddRetType | VectorizeRetType, 1908 VectorRetGetArgs01 = 1909 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, 1910 FpCmpzModifiers = 1911 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType 1912 }; 1913 1914 struct NeonIntrinsicInfo { 1915 unsigned BuiltinID; 1916 unsigned LLVMIntrinsic; 1917 unsigned AltLLVMIntrinsic; 1918 const char *NameHint; 1919 unsigned TypeModifier; 1920 1921 bool operator<(unsigned RHSBuiltinID) const { 1922 return BuiltinID < RHSBuiltinID; 1923 } 1924 }; 1925 1926 #define NEONMAP0(NameBase) \ 1927 { NEON::BI__builtin_neon_ ## NameBase, 0, 0, #NameBase, 0 } 1928 1929 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 1930 { NEON:: BI__builtin_neon_ ## NameBase, \ 1931 Intrinsic::LLVMIntrinsic, 0, #NameBase, TypeModifier } 1932 1933 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ 1934 { NEON:: BI__builtin_neon_ ## NameBase, \ 1935 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ 1936 #NameBase, TypeModifier } 1937 1938 static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { 1939 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 1940 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 1941 NEONMAP1(vabs_v, arm_neon_vabs, 0), 1942 NEONMAP1(vabsq_v, arm_neon_vabs, 0), 1943 NEONMAP0(vaddhn_v), 1944 NEONMAP1(vaesdq_v, arm_neon_aesd, 0), 1945 NEONMAP1(vaeseq_v, arm_neon_aese, 0), 1946 NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), 1947 NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), 1948 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), 1949 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), 1950 NEONMAP1(vcage_v, arm_neon_vacge, 0), 1951 NEONMAP1(vcageq_v, arm_neon_vacge, 0), 1952 NEONMAP1(vcagt_v, arm_neon_vacgt, 0), 1953 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0), 1954 NEONMAP1(vcale_v, arm_neon_vacge, 0), 1955 NEONMAP1(vcaleq_v, arm_neon_vacge, 0), 1956 NEONMAP1(vcalt_v, arm_neon_vacgt, 0), 1957 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), 1958 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), 1959 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), 1960 NEONMAP1(vclz_v, ctlz, Add1ArgType), 1961 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 1962 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 1963 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 1964 NEONMAP1(vcvt_f16_v, arm_neon_vcvtfp2hf, 0), 1965 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), 1966 NEONMAP0(vcvt_f32_v), 1967 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 1968 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), 1969 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), 1970 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), 1971 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), 1972 NEONMAP0(vcvt_s32_v), 1973 NEONMAP0(vcvt_s64_v), 1974 NEONMAP0(vcvt_u32_v), 1975 NEONMAP0(vcvt_u64_v), 1976 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), 1977 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), 1978 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), 1979 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), 1980 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), 1981 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), 1982 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), 1983 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), 1984 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), 1985 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), 1986 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), 1987 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), 1988 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), 1989 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), 1990 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), 1991 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), 1992 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), 1993 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), 1994 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), 1995 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), 1996 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), 1997 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), 1998 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), 1999 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), 2000 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), 2001 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), 2002 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), 2003 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), 2004 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), 2005 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), 2006 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), 2007 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), 2008 NEONMAP0(vcvtq_f32_v), 2009 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 2010 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), 2011 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), 2012 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), 2013 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), 2014 NEONMAP0(vcvtq_s32_v), 2015 NEONMAP0(vcvtq_s64_v), 2016 NEONMAP0(vcvtq_u32_v), 2017 NEONMAP0(vcvtq_u64_v), 2018 NEONMAP0(vext_v), 2019 NEONMAP0(vextq_v), 2020 NEONMAP0(vfma_v), 2021 NEONMAP0(vfmaq_v), 2022 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 2023 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 2024 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 2025 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 2026 NEONMAP0(vld1_dup_v), 2027 NEONMAP1(vld1_v, arm_neon_vld1, 0), 2028 NEONMAP0(vld1q_dup_v), 2029 NEONMAP1(vld1q_v, arm_neon_vld1, 0), 2030 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), 2031 NEONMAP1(vld2_v, arm_neon_vld2, 0), 2032 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), 2033 NEONMAP1(vld2q_v, arm_neon_vld2, 0), 2034 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), 2035 NEONMAP1(vld3_v, arm_neon_vld3, 0), 2036 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), 2037 NEONMAP1(vld3q_v, arm_neon_vld3, 0), 2038 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), 2039 NEONMAP1(vld4_v, arm_neon_vld4, 0), 2040 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), 2041 NEONMAP1(vld4q_v, arm_neon_vld4, 0), 2042 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 2043 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), 2044 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), 2045 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 2046 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 2047 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), 2048 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), 2049 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 2050 NEONMAP0(vmovl_v), 2051 NEONMAP0(vmovn_v), 2052 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), 2053 NEONMAP0(vmull_v), 2054 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType), 2055 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 2056 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 2057 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType), 2058 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 2059 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 2060 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType), 2061 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts), 2062 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), 2063 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), 2064 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), 2065 NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 2066 NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 2067 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0), 2068 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0), 2069 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), 2070 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), 2071 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), 2072 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts), 2073 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), 2074 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), 2075 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), 2076 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), 2077 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), 2078 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 2079 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 2080 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 2081 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 2082 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 2083 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 2084 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), 2085 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), 2086 NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 2087 NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 2088 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), 2089 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 2090 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 2091 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType), 2092 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), 2093 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 2094 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 2095 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), 2096 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), 2097 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), 2098 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), 2099 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), 2100 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), 2101 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), 2102 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), 2103 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), 2104 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), 2105 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), 2106 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), 2107 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 2108 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 2109 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 2110 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 2111 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 2112 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 2113 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), 2114 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), 2115 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), 2116 NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0), 2117 NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0), 2118 NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0), 2119 NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0), 2120 NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0), 2121 NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0), 2122 NEONMAP0(vshl_n_v), 2123 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 2124 NEONMAP0(vshll_n_v), 2125 NEONMAP0(vshlq_n_v), 2126 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 2127 NEONMAP0(vshr_n_v), 2128 NEONMAP0(vshrn_n_v), 2129 NEONMAP0(vshrq_n_v), 2130 NEONMAP1(vst1_v, arm_neon_vst1, 0), 2131 NEONMAP1(vst1q_v, arm_neon_vst1, 0), 2132 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), 2133 NEONMAP1(vst2_v, arm_neon_vst2, 0), 2134 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), 2135 NEONMAP1(vst2q_v, arm_neon_vst2, 0), 2136 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0), 2137 NEONMAP1(vst3_v, arm_neon_vst3, 0), 2138 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0), 2139 NEONMAP1(vst3q_v, arm_neon_vst3, 0), 2140 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0), 2141 NEONMAP1(vst4_v, arm_neon_vst4, 0), 2142 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0), 2143 NEONMAP1(vst4q_v, arm_neon_vst4, 0), 2144 NEONMAP0(vsubhn_v), 2145 NEONMAP0(vtrn_v), 2146 NEONMAP0(vtrnq_v), 2147 NEONMAP0(vtst_v), 2148 NEONMAP0(vtstq_v), 2149 NEONMAP0(vuzp_v), 2150 NEONMAP0(vuzpq_v), 2151 NEONMAP0(vzip_v), 2152 NEONMAP0(vzipq_v) 2153 }; 2154 2155 static NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { 2156 NEONMAP1(vabs_v, aarch64_neon_abs, 0), 2157 NEONMAP1(vabsq_v, aarch64_neon_abs, 0), 2158 NEONMAP0(vaddhn_v), 2159 NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), 2160 NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), 2161 NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), 2162 NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), 2163 NEONMAP1(vcage_v, aarch64_neon_facge, 0), 2164 NEONMAP1(vcageq_v, aarch64_neon_facge, 0), 2165 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), 2166 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), 2167 NEONMAP1(vcale_v, aarch64_neon_facge, 0), 2168 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), 2169 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), 2170 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), 2171 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), 2172 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), 2173 NEONMAP1(vclz_v, ctlz, Add1ArgType), 2174 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 2175 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 2176 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 2177 NEONMAP1(vcvt_f16_v, aarch64_neon_vcvtfp2hf, 0), 2178 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), 2179 NEONMAP0(vcvt_f32_v), 2180 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2181 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2182 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 2183 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 2184 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 2185 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 2186 NEONMAP0(vcvtq_f32_v), 2187 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2188 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2189 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 2190 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 2191 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 2192 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 2193 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), 2194 NEONMAP0(vext_v), 2195 NEONMAP0(vextq_v), 2196 NEONMAP0(vfma_v), 2197 NEONMAP0(vfmaq_v), 2198 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 2199 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 2200 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 2201 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 2202 NEONMAP0(vmovl_v), 2203 NEONMAP0(vmovn_v), 2204 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), 2205 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), 2206 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), 2207 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 2208 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 2209 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), 2210 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), 2211 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), 2212 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 2213 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 2214 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), 2215 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), 2216 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), 2217 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), 2218 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), 2219 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), 2220 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), 2221 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), 2222 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), 2223 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), 2224 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), 2225 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 2226 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 2227 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), 2228 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 2229 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), 2230 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 2231 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), 2232 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), 2233 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 2234 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 2235 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), 2236 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 2237 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 2238 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), 2239 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), 2240 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 2241 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 2242 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 2243 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 2244 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 2245 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 2246 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 2247 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 2248 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), 2249 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), 2250 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), 2251 NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0), 2252 NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0), 2253 NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0), 2254 NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0), 2255 NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0), 2256 NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0), 2257 NEONMAP0(vshl_n_v), 2258 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 2259 NEONMAP0(vshll_n_v), 2260 NEONMAP0(vshlq_n_v), 2261 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 2262 NEONMAP0(vshr_n_v), 2263 NEONMAP0(vshrn_n_v), 2264 NEONMAP0(vshrq_n_v), 2265 NEONMAP0(vsubhn_v), 2266 NEONMAP0(vtst_v), 2267 NEONMAP0(vtstq_v), 2268 }; 2269 2270 static NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { 2271 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), 2272 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), 2273 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), 2274 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 2275 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 2276 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 2277 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 2278 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 2279 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 2280 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 2281 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 2282 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), 2283 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 2284 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), 2285 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 2286 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 2287 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 2288 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 2289 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 2290 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 2291 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 2292 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 2293 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 2294 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 2295 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 2296 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 2297 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 2298 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 2299 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 2300 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 2301 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 2302 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 2303 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 2304 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 2305 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 2306 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 2307 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 2308 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 2309 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 2310 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 2311 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 2312 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 2313 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 2314 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 2315 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 2316 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 2317 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 2318 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 2319 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), 2320 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 2321 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 2322 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 2323 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 2324 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 2325 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 2326 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 2327 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 2328 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 2329 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 2330 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 2331 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 2332 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 2333 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 2334 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 2335 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 2336 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 2337 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 2338 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 2339 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 2340 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), 2341 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), 2342 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), 2343 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 2344 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 2345 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 2346 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 2347 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 2348 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 2349 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 2350 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 2351 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 2352 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 2353 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 2354 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), 2355 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 2356 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), 2357 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 2358 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 2359 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), 2360 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), 2361 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 2362 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 2363 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), 2364 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), 2365 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), 2366 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), 2367 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), 2368 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), 2369 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), 2370 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), 2371 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 2372 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 2373 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 2374 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 2375 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), 2376 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 2377 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 2378 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 2379 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), 2380 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 2381 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), 2382 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), 2383 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), 2384 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 2385 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 2386 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), 2387 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), 2388 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 2389 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 2390 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), 2391 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), 2392 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), 2393 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), 2394 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 2395 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 2396 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 2397 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 2398 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), 2399 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 2400 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 2401 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 2402 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 2403 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 2404 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 2405 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), 2406 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), 2407 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 2408 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 2409 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 2410 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 2411 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), 2412 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), 2413 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), 2414 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), 2415 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 2416 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 2417 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), 2418 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), 2419 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), 2420 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 2421 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 2422 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 2423 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 2424 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), 2425 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 2426 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 2427 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 2428 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 2429 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), 2430 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), 2431 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 2432 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 2433 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), 2434 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), 2435 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), 2436 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), 2437 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), 2438 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), 2439 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), 2440 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), 2441 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), 2442 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), 2443 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), 2444 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), 2445 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), 2446 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), 2447 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), 2448 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), 2449 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), 2450 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), 2451 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), 2452 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), 2453 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 2454 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), 2455 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 2456 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), 2457 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), 2458 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), 2459 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 2460 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), 2461 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 2462 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), 2463 }; 2464 2465 #undef NEONMAP0 2466 #undef NEONMAP1 2467 #undef NEONMAP2 2468 2469 static bool NEONSIMDIntrinsicsProvenSorted = false; 2470 2471 static bool AArch64SIMDIntrinsicsProvenSorted = false; 2472 static bool AArch64SISDIntrinsicsProvenSorted = false; 2473 2474 2475 static const NeonIntrinsicInfo * 2476 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap, 2477 unsigned BuiltinID, bool &MapProvenSorted) { 2478 2479 #ifndef NDEBUG 2480 if (!MapProvenSorted) { 2481 // FIXME: use std::is_sorted once C++11 is allowed 2482 for (unsigned i = 0; i < IntrinsicMap.size() - 1; ++i) 2483 assert(IntrinsicMap[i].BuiltinID <= IntrinsicMap[i + 1].BuiltinID); 2484 MapProvenSorted = true; 2485 } 2486 #endif 2487 2488 const NeonIntrinsicInfo *Builtin = 2489 std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID); 2490 2491 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) 2492 return Builtin; 2493 2494 return nullptr; 2495 } 2496 2497 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, 2498 unsigned Modifier, 2499 llvm::Type *ArgType, 2500 const CallExpr *E) { 2501 int VectorSize = 0; 2502 if (Modifier & Use64BitVectors) 2503 VectorSize = 64; 2504 else if (Modifier & Use128BitVectors) 2505 VectorSize = 128; 2506 2507 // Return type. 2508 SmallVector<llvm::Type *, 3> Tys; 2509 if (Modifier & AddRetType) { 2510 llvm::Type *Ty = ConvertType(E->getCallReturnType()); 2511 if (Modifier & VectorizeRetType) 2512 Ty = llvm::VectorType::get( 2513 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); 2514 2515 Tys.push_back(Ty); 2516 } 2517 2518 // Arguments. 2519 if (Modifier & VectorizeArgTypes) { 2520 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1; 2521 ArgType = llvm::VectorType::get(ArgType, Elts); 2522 } 2523 2524 if (Modifier & (Add1ArgType | Add2ArgTypes)) 2525 Tys.push_back(ArgType); 2526 2527 if (Modifier & Add2ArgTypes) 2528 Tys.push_back(ArgType); 2529 2530 if (Modifier & InventFloatType) 2531 Tys.push_back(FloatTy); 2532 2533 return CGM.getIntrinsic(IntrinsicID, Tys); 2534 } 2535 2536 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, 2537 const NeonIntrinsicInfo &SISDInfo, 2538 SmallVectorImpl<Value *> &Ops, 2539 const CallExpr *E) { 2540 unsigned BuiltinID = SISDInfo.BuiltinID; 2541 unsigned int Int = SISDInfo.LLVMIntrinsic; 2542 unsigned Modifier = SISDInfo.TypeModifier; 2543 const char *s = SISDInfo.NameHint; 2544 2545 switch (BuiltinID) { 2546 case NEON::BI__builtin_neon_vcled_s64: 2547 case NEON::BI__builtin_neon_vcled_u64: 2548 case NEON::BI__builtin_neon_vcles_f32: 2549 case NEON::BI__builtin_neon_vcled_f64: 2550 case NEON::BI__builtin_neon_vcltd_s64: 2551 case NEON::BI__builtin_neon_vcltd_u64: 2552 case NEON::BI__builtin_neon_vclts_f32: 2553 case NEON::BI__builtin_neon_vcltd_f64: 2554 case NEON::BI__builtin_neon_vcales_f32: 2555 case NEON::BI__builtin_neon_vcaled_f64: 2556 case NEON::BI__builtin_neon_vcalts_f32: 2557 case NEON::BI__builtin_neon_vcaltd_f64: 2558 // Only one direction of comparisons actually exist, cmle is actually a cmge 2559 // with swapped operands. The table gives us the right intrinsic but we 2560 // still need to do the swap. 2561 std::swap(Ops[0], Ops[1]); 2562 break; 2563 } 2564 2565 assert(Int && "Generic code assumes a valid intrinsic"); 2566 2567 // Determine the type(s) of this overloaded AArch64 intrinsic. 2568 const Expr *Arg = E->getArg(0); 2569 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); 2570 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); 2571 2572 int j = 0; 2573 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); 2574 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 2575 ai != ae; ++ai, ++j) { 2576 llvm::Type *ArgTy = ai->getType(); 2577 if (Ops[j]->getType()->getPrimitiveSizeInBits() == 2578 ArgTy->getPrimitiveSizeInBits()) 2579 continue; 2580 2581 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); 2582 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate 2583 // it before inserting. 2584 Ops[j] = 2585 CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType()); 2586 Ops[j] = 2587 CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0); 2588 } 2589 2590 Value *Result = CGF.EmitNeonCall(F, Ops, s); 2591 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 2592 if (ResultType->getPrimitiveSizeInBits() < 2593 Result->getType()->getPrimitiveSizeInBits()) 2594 return CGF.Builder.CreateExtractElement(Result, C0); 2595 2596 return CGF.Builder.CreateBitCast(Result, ResultType, s); 2597 } 2598 2599 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( 2600 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, 2601 const char *NameHint, unsigned Modifier, const CallExpr *E, 2602 SmallVectorImpl<llvm::Value *> &Ops, llvm::Value *Align) { 2603 // Get the last argument, which specifies the vector type. 2604 llvm::APSInt NeonTypeConst; 2605 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 2606 if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext())) 2607 return nullptr; 2608 2609 // Determine the type of this overloaded NEON intrinsic. 2610 NeonTypeFlags Type(NeonTypeConst.getZExtValue()); 2611 bool Usgn = Type.isUnsigned(); 2612 bool Quad = Type.isQuad(); 2613 2614 llvm::VectorType *VTy = GetNeonType(this, Type); 2615 llvm::Type *Ty = VTy; 2616 if (!Ty) 2617 return nullptr; 2618 2619 unsigned Int = LLVMIntrinsic; 2620 if ((Modifier & UnsignedAlts) && !Usgn) 2621 Int = AltLLVMIntrinsic; 2622 2623 switch (BuiltinID) { 2624 default: break; 2625 case NEON::BI__builtin_neon_vabs_v: 2626 case NEON::BI__builtin_neon_vabsq_v: 2627 if (VTy->getElementType()->isFloatingPointTy()) 2628 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); 2629 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); 2630 case NEON::BI__builtin_neon_vaddhn_v: { 2631 llvm::VectorType *SrcTy = 2632 llvm::VectorType::getExtendedElementVectorType(VTy); 2633 2634 // %sum = add <4 x i32> %lhs, %rhs 2635 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 2636 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 2637 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); 2638 2639 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 2640 Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(), 2641 SrcTy->getScalarSizeInBits() / 2); 2642 ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt); 2643 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); 2644 2645 // %res = trunc <4 x i32> %high to <4 x i16> 2646 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); 2647 } 2648 case NEON::BI__builtin_neon_vcale_v: 2649 case NEON::BI__builtin_neon_vcaleq_v: 2650 case NEON::BI__builtin_neon_vcalt_v: 2651 case NEON::BI__builtin_neon_vcaltq_v: 2652 std::swap(Ops[0], Ops[1]); 2653 case NEON::BI__builtin_neon_vcage_v: 2654 case NEON::BI__builtin_neon_vcageq_v: 2655 case NEON::BI__builtin_neon_vcagt_v: 2656 case NEON::BI__builtin_neon_vcagtq_v: { 2657 llvm::Type *VecFlt = llvm::VectorType::get( 2658 VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy, 2659 VTy->getNumElements()); 2660 llvm::Type *Tys[] = { VTy, VecFlt }; 2661 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 2662 return EmitNeonCall(F, Ops, NameHint); 2663 } 2664 case NEON::BI__builtin_neon_vclz_v: 2665 case NEON::BI__builtin_neon_vclzq_v: 2666 // We generate target-independent intrinsic, which needs a second argument 2667 // for whether or not clz of zero is undefined; on ARM it isn't. 2668 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); 2669 break; 2670 case NEON::BI__builtin_neon_vcvt_f32_v: 2671 case NEON::BI__builtin_neon_vcvtq_f32_v: 2672 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2673 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad)); 2674 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 2675 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 2676 case NEON::BI__builtin_neon_vcvt_n_f32_v: 2677 case NEON::BI__builtin_neon_vcvt_n_f64_v: 2678 case NEON::BI__builtin_neon_vcvtq_n_f32_v: 2679 case NEON::BI__builtin_neon_vcvtq_n_f64_v: { 2680 bool Double = 2681 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 2682 llvm::Type *FloatTy = 2683 GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64 2684 : NeonTypeFlags::Float32, 2685 false, Quad)); 2686 llvm::Type *Tys[2] = { FloatTy, Ty }; 2687 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; 2688 Function *F = CGM.getIntrinsic(Int, Tys); 2689 return EmitNeonCall(F, Ops, "vcvt_n"); 2690 } 2691 case NEON::BI__builtin_neon_vcvt_n_s32_v: 2692 case NEON::BI__builtin_neon_vcvt_n_u32_v: 2693 case NEON::BI__builtin_neon_vcvt_n_s64_v: 2694 case NEON::BI__builtin_neon_vcvt_n_u64_v: 2695 case NEON::BI__builtin_neon_vcvtq_n_s32_v: 2696 case NEON::BI__builtin_neon_vcvtq_n_u32_v: 2697 case NEON::BI__builtin_neon_vcvtq_n_s64_v: 2698 case NEON::BI__builtin_neon_vcvtq_n_u64_v: { 2699 bool Double = 2700 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 2701 llvm::Type *FloatTy = 2702 GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64 2703 : NeonTypeFlags::Float32, 2704 false, Quad)); 2705 llvm::Type *Tys[2] = { Ty, FloatTy }; 2706 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 2707 return EmitNeonCall(F, Ops, "vcvt_n"); 2708 } 2709 case NEON::BI__builtin_neon_vcvt_s32_v: 2710 case NEON::BI__builtin_neon_vcvt_u32_v: 2711 case NEON::BI__builtin_neon_vcvt_s64_v: 2712 case NEON::BI__builtin_neon_vcvt_u64_v: 2713 case NEON::BI__builtin_neon_vcvtq_s32_v: 2714 case NEON::BI__builtin_neon_vcvtq_u32_v: 2715 case NEON::BI__builtin_neon_vcvtq_s64_v: 2716 case NEON::BI__builtin_neon_vcvtq_u64_v: { 2717 bool Double = 2718 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 2719 llvm::Type *FloatTy = 2720 GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64 2721 : NeonTypeFlags::Float32, 2722 false, Quad)); 2723 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 2724 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 2725 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 2726 } 2727 case NEON::BI__builtin_neon_vcvta_s32_v: 2728 case NEON::BI__builtin_neon_vcvta_s64_v: 2729 case NEON::BI__builtin_neon_vcvta_u32_v: 2730 case NEON::BI__builtin_neon_vcvta_u64_v: 2731 case NEON::BI__builtin_neon_vcvtaq_s32_v: 2732 case NEON::BI__builtin_neon_vcvtaq_s64_v: 2733 case NEON::BI__builtin_neon_vcvtaq_u32_v: 2734 case NEON::BI__builtin_neon_vcvtaq_u64_v: 2735 case NEON::BI__builtin_neon_vcvtn_s32_v: 2736 case NEON::BI__builtin_neon_vcvtn_s64_v: 2737 case NEON::BI__builtin_neon_vcvtn_u32_v: 2738 case NEON::BI__builtin_neon_vcvtn_u64_v: 2739 case NEON::BI__builtin_neon_vcvtnq_s32_v: 2740 case NEON::BI__builtin_neon_vcvtnq_s64_v: 2741 case NEON::BI__builtin_neon_vcvtnq_u32_v: 2742 case NEON::BI__builtin_neon_vcvtnq_u64_v: 2743 case NEON::BI__builtin_neon_vcvtp_s32_v: 2744 case NEON::BI__builtin_neon_vcvtp_s64_v: 2745 case NEON::BI__builtin_neon_vcvtp_u32_v: 2746 case NEON::BI__builtin_neon_vcvtp_u64_v: 2747 case NEON::BI__builtin_neon_vcvtpq_s32_v: 2748 case NEON::BI__builtin_neon_vcvtpq_s64_v: 2749 case NEON::BI__builtin_neon_vcvtpq_u32_v: 2750 case NEON::BI__builtin_neon_vcvtpq_u64_v: 2751 case NEON::BI__builtin_neon_vcvtm_s32_v: 2752 case NEON::BI__builtin_neon_vcvtm_s64_v: 2753 case NEON::BI__builtin_neon_vcvtm_u32_v: 2754 case NEON::BI__builtin_neon_vcvtm_u64_v: 2755 case NEON::BI__builtin_neon_vcvtmq_s32_v: 2756 case NEON::BI__builtin_neon_vcvtmq_s64_v: 2757 case NEON::BI__builtin_neon_vcvtmq_u32_v: 2758 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 2759 bool Double = 2760 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 2761 llvm::Type *InTy = 2762 GetNeonType(this, 2763 NeonTypeFlags(Double ? NeonTypeFlags::Float64 2764 : NeonTypeFlags::Float32, false, Quad)); 2765 llvm::Type *Tys[2] = { Ty, InTy }; 2766 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); 2767 } 2768 case NEON::BI__builtin_neon_vext_v: 2769 case NEON::BI__builtin_neon_vextq_v: { 2770 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 2771 SmallVector<Constant*, 16> Indices; 2772 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 2773 Indices.push_back(ConstantInt::get(Int32Ty, i+CV)); 2774 2775 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2776 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 2777 Value *SV = llvm::ConstantVector::get(Indices); 2778 return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext"); 2779 } 2780 case NEON::BI__builtin_neon_vfma_v: 2781 case NEON::BI__builtin_neon_vfmaq_v: { 2782 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 2783 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2784 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 2785 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 2786 2787 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 2788 return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 2789 } 2790 case NEON::BI__builtin_neon_vld1_v: 2791 case NEON::BI__builtin_neon_vld1q_v: 2792 Ops.push_back(Align); 2793 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vld1"); 2794 case NEON::BI__builtin_neon_vld2_v: 2795 case NEON::BI__builtin_neon_vld2q_v: 2796 case NEON::BI__builtin_neon_vld3_v: 2797 case NEON::BI__builtin_neon_vld3q_v: 2798 case NEON::BI__builtin_neon_vld4_v: 2799 case NEON::BI__builtin_neon_vld4q_v: { 2800 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty); 2801 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, NameHint); 2802 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 2803 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2804 return Builder.CreateStore(Ops[1], Ops[0]); 2805 } 2806 case NEON::BI__builtin_neon_vld1_dup_v: 2807 case NEON::BI__builtin_neon_vld1q_dup_v: { 2808 Value *V = UndefValue::get(Ty); 2809 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 2810 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2811 LoadInst *Ld = Builder.CreateLoad(Ops[0]); 2812 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 2813 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 2814 Ops[0] = Builder.CreateInsertElement(V, Ld, CI); 2815 return EmitNeonSplat(Ops[0], CI); 2816 } 2817 case NEON::BI__builtin_neon_vld2_lane_v: 2818 case NEON::BI__builtin_neon_vld2q_lane_v: 2819 case NEON::BI__builtin_neon_vld3_lane_v: 2820 case NEON::BI__builtin_neon_vld3q_lane_v: 2821 case NEON::BI__builtin_neon_vld4_lane_v: 2822 case NEON::BI__builtin_neon_vld4q_lane_v: { 2823 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty); 2824 for (unsigned I = 2; I < Ops.size() - 1; ++I) 2825 Ops[I] = Builder.CreateBitCast(Ops[I], Ty); 2826 Ops.push_back(Align); 2827 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint); 2828 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 2829 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2830 return Builder.CreateStore(Ops[1], Ops[0]); 2831 } 2832 case NEON::BI__builtin_neon_vmovl_v: { 2833 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 2834 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 2835 if (Usgn) 2836 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 2837 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 2838 } 2839 case NEON::BI__builtin_neon_vmovn_v: { 2840 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 2841 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 2842 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 2843 } 2844 case NEON::BI__builtin_neon_vmull_v: 2845 // FIXME: the integer vmull operations could be emitted in terms of pure 2846 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of 2847 // hoisting the exts outside loops. Until global ISel comes along that can 2848 // see through such movement this leads to bad CodeGen. So we need an 2849 // intrinsic for now. 2850 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 2851 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 2852 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 2853 case NEON::BI__builtin_neon_vpadal_v: 2854 case NEON::BI__builtin_neon_vpadalq_v: { 2855 // The source operand type has twice as many elements of half the size. 2856 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 2857 llvm::Type *EltTy = 2858 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 2859 llvm::Type *NarrowTy = 2860 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 2861 llvm::Type *Tys[2] = { Ty, NarrowTy }; 2862 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 2863 } 2864 case NEON::BI__builtin_neon_vpaddl_v: 2865 case NEON::BI__builtin_neon_vpaddlq_v: { 2866 // The source operand type has twice as many elements of half the size. 2867 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 2868 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 2869 llvm::Type *NarrowTy = 2870 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 2871 llvm::Type *Tys[2] = { Ty, NarrowTy }; 2872 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); 2873 } 2874 case NEON::BI__builtin_neon_vqdmlal_v: 2875 case NEON::BI__builtin_neon_vqdmlsl_v: { 2876 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 2877 Value *Mul = EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), 2878 MulOps, "vqdmlal"); 2879 2880 SmallVector<Value *, 2> AccumOps; 2881 AccumOps.push_back(Ops[0]); 2882 AccumOps.push_back(Mul); 2883 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), 2884 AccumOps, NameHint); 2885 } 2886 case NEON::BI__builtin_neon_vqshl_n_v: 2887 case NEON::BI__builtin_neon_vqshlq_n_v: 2888 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 2889 1, false); 2890 case NEON::BI__builtin_neon_vqshlu_n_v: 2891 case NEON::BI__builtin_neon_vqshluq_n_v: 2892 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 2893 1, false); 2894 case NEON::BI__builtin_neon_vrecpe_v: 2895 case NEON::BI__builtin_neon_vrecpeq_v: 2896 case NEON::BI__builtin_neon_vrsqrte_v: 2897 case NEON::BI__builtin_neon_vrsqrteq_v: 2898 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; 2899 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); 2900 2901 case NEON::BI__builtin_neon_vrshr_n_v: 2902 case NEON::BI__builtin_neon_vrshrq_n_v: 2903 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 2904 1, true); 2905 case NEON::BI__builtin_neon_vshl_n_v: 2906 case NEON::BI__builtin_neon_vshlq_n_v: 2907 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 2908 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], 2909 "vshl_n"); 2910 case NEON::BI__builtin_neon_vshll_n_v: { 2911 llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); 2912 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 2913 if (Usgn) 2914 Ops[0] = Builder.CreateZExt(Ops[0], VTy); 2915 else 2916 Ops[0] = Builder.CreateSExt(Ops[0], VTy); 2917 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); 2918 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); 2919 } 2920 case NEON::BI__builtin_neon_vshrn_n_v: { 2921 llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); 2922 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 2923 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); 2924 if (Usgn) 2925 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); 2926 else 2927 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); 2928 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); 2929 } 2930 case NEON::BI__builtin_neon_vshr_n_v: 2931 case NEON::BI__builtin_neon_vshrq_n_v: 2932 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n"); 2933 case NEON::BI__builtin_neon_vst1_v: 2934 case NEON::BI__builtin_neon_vst1q_v: 2935 case NEON::BI__builtin_neon_vst2_v: 2936 case NEON::BI__builtin_neon_vst2q_v: 2937 case NEON::BI__builtin_neon_vst3_v: 2938 case NEON::BI__builtin_neon_vst3q_v: 2939 case NEON::BI__builtin_neon_vst4_v: 2940 case NEON::BI__builtin_neon_vst4q_v: 2941 case NEON::BI__builtin_neon_vst2_lane_v: 2942 case NEON::BI__builtin_neon_vst2q_lane_v: 2943 case NEON::BI__builtin_neon_vst3_lane_v: 2944 case NEON::BI__builtin_neon_vst3q_lane_v: 2945 case NEON::BI__builtin_neon_vst4_lane_v: 2946 case NEON::BI__builtin_neon_vst4q_lane_v: 2947 Ops.push_back(Align); 2948 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, ""); 2949 case NEON::BI__builtin_neon_vsubhn_v: { 2950 llvm::VectorType *SrcTy = 2951 llvm::VectorType::getExtendedElementVectorType(VTy); 2952 2953 // %sum = add <4 x i32> %lhs, %rhs 2954 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 2955 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 2956 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); 2957 2958 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 2959 Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(), 2960 SrcTy->getScalarSizeInBits() / 2); 2961 ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt); 2962 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); 2963 2964 // %res = trunc <4 x i32> %high to <4 x i16> 2965 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); 2966 } 2967 case NEON::BI__builtin_neon_vtrn_v: 2968 case NEON::BI__builtin_neon_vtrnq_v: { 2969 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 2970 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 2971 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 2972 Value *SV = nullptr; 2973 2974 for (unsigned vi = 0; vi != 2; ++vi) { 2975 SmallVector<Constant*, 16> Indices; 2976 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 2977 Indices.push_back(Builder.getInt32(i+vi)); 2978 Indices.push_back(Builder.getInt32(i+e+vi)); 2979 } 2980 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 2981 SV = llvm::ConstantVector::get(Indices); 2982 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); 2983 SV = Builder.CreateStore(SV, Addr); 2984 } 2985 return SV; 2986 } 2987 case NEON::BI__builtin_neon_vtst_v: 2988 case NEON::BI__builtin_neon_vtstq_v: { 2989 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2990 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 2991 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 2992 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 2993 ConstantAggregateZero::get(Ty)); 2994 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 2995 } 2996 case NEON::BI__builtin_neon_vuzp_v: 2997 case NEON::BI__builtin_neon_vuzpq_v: { 2998 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 2999 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3000 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3001 Value *SV = nullptr; 3002 3003 for (unsigned vi = 0; vi != 2; ++vi) { 3004 SmallVector<Constant*, 16> Indices; 3005 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 3006 Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); 3007 3008 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 3009 SV = llvm::ConstantVector::get(Indices); 3010 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); 3011 SV = Builder.CreateStore(SV, Addr); 3012 } 3013 return SV; 3014 } 3015 case NEON::BI__builtin_neon_vzip_v: 3016 case NEON::BI__builtin_neon_vzipq_v: { 3017 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 3018 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3019 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3020 Value *SV = nullptr; 3021 3022 for (unsigned vi = 0; vi != 2; ++vi) { 3023 SmallVector<Constant*, 16> Indices; 3024 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 3025 Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); 3026 Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); 3027 } 3028 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 3029 SV = llvm::ConstantVector::get(Indices); 3030 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); 3031 SV = Builder.CreateStore(SV, Addr); 3032 } 3033 return SV; 3034 } 3035 } 3036 3037 assert(Int && "Expected valid intrinsic number"); 3038 3039 // Determine the type(s) of this overloaded AArch64 intrinsic. 3040 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E); 3041 3042 Value *Result = EmitNeonCall(F, Ops, NameHint); 3043 llvm::Type *ResultType = ConvertType(E->getType()); 3044 // AArch64 intrinsic one-element vector type cast to 3045 // scalar type expected by the builtin 3046 return Builder.CreateBitCast(Result, ResultType, NameHint); 3047 } 3048 3049 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( 3050 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, 3051 const CmpInst::Predicate Ip, const Twine &Name) { 3052 llvm::Type *OTy = Op->getType(); 3053 3054 // FIXME: this is utterly horrific. We should not be looking at previous 3055 // codegen context to find out what needs doing. Unfortunately TableGen 3056 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32 3057 // (etc). 3058 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op)) 3059 OTy = BI->getOperand(0)->getType(); 3060 3061 Op = Builder.CreateBitCast(Op, OTy); 3062 if (OTy->getScalarType()->isFloatingPointTy()) { 3063 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); 3064 } else { 3065 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); 3066 } 3067 return Builder.CreateSExt(Op, Ty, Name); 3068 } 3069 3070 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 3071 Value *ExtOp, Value *IndexOp, 3072 llvm::Type *ResTy, unsigned IntID, 3073 const char *Name) { 3074 SmallVector<Value *, 2> TblOps; 3075 if (ExtOp) 3076 TblOps.push_back(ExtOp); 3077 3078 // Build a vector containing sequential number like (0, 1, 2, ..., 15) 3079 SmallVector<Constant*, 16> Indices; 3080 llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); 3081 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { 3082 Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i)); 3083 Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1)); 3084 } 3085 Value *SV = llvm::ConstantVector::get(Indices); 3086 3087 int PairPos = 0, End = Ops.size() - 1; 3088 while (PairPos < End) { 3089 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 3090 Ops[PairPos+1], SV, Name)); 3091 PairPos += 2; 3092 } 3093 3094 // If there's an odd number of 64-bit lookup table, fill the high 64-bit 3095 // of the 128-bit lookup table with zero. 3096 if (PairPos == End) { 3097 Value *ZeroTbl = ConstantAggregateZero::get(TblTy); 3098 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 3099 ZeroTbl, SV, Name)); 3100 } 3101 3102 Function *TblF; 3103 TblOps.push_back(IndexOp); 3104 TblF = CGF.CGM.getIntrinsic(IntID, ResTy); 3105 3106 return CGF.EmitNeonCall(TblF, TblOps, Name); 3107 } 3108 3109 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 3110 const CallExpr *E) { 3111 unsigned HintID = static_cast<unsigned>(-1); 3112 switch (BuiltinID) { 3113 default: break; 3114 case ARM::BI__builtin_arm_nop: 3115 HintID = 0; 3116 break; 3117 case ARM::BI__builtin_arm_yield: 3118 case ARM::BI__yield: 3119 HintID = 1; 3120 break; 3121 case ARM::BI__builtin_arm_wfe: 3122 case ARM::BI__wfe: 3123 HintID = 2; 3124 break; 3125 case ARM::BI__builtin_arm_wfi: 3126 case ARM::BI__wfi: 3127 HintID = 3; 3128 break; 3129 case ARM::BI__builtin_arm_sev: 3130 case ARM::BI__sev: 3131 HintID = 4; 3132 break; 3133 case ARM::BI__builtin_arm_sevl: 3134 case ARM::BI__sevl: 3135 HintID = 5; 3136 break; 3137 } 3138 3139 if (HintID != static_cast<unsigned>(-1)) { 3140 Function *F = CGM.getIntrinsic(Intrinsic::arm_hint); 3141 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 3142 } 3143 3144 if (BuiltinID == ARM::BI__builtin_arm_dbg) { 3145 Value *Option = EmitScalarExpr(E->getArg(0)); 3146 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); 3147 } 3148 3149 if (BuiltinID == ARM::BI__builtin_arm_prefetch) { 3150 Value *Address = EmitScalarExpr(E->getArg(0)); 3151 Value *RW = EmitScalarExpr(E->getArg(1)); 3152 Value *IsData = EmitScalarExpr(E->getArg(2)); 3153 3154 // Locality is not supported on ARM target 3155 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); 3156 3157 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 3158 return Builder.CreateCall4(F, Address, RW, Locality, IsData); 3159 } 3160 3161 if (BuiltinID == ARM::BI__builtin_arm_rbit) { 3162 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit), 3163 EmitScalarExpr(E->getArg(0)), 3164 "rbit"); 3165 } 3166 3167 if (BuiltinID == ARM::BI__clear_cache) { 3168 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 3169 const FunctionDecl *FD = E->getDirectCallee(); 3170 SmallVector<Value*, 2> Ops; 3171 for (unsigned i = 0; i < 2; i++) 3172 Ops.push_back(EmitScalarExpr(E->getArg(i))); 3173 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 3174 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 3175 StringRef Name = FD->getName(); 3176 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 3177 } 3178 3179 if (BuiltinID == ARM::BI__builtin_arm_ldrexd || 3180 ((BuiltinID == ARM::BI__builtin_arm_ldrex || 3181 BuiltinID == ARM::BI__builtin_arm_ldaex) && 3182 getContext().getTypeSize(E->getType()) == 64) || 3183 BuiltinID == ARM::BI__ldrexd) { 3184 Function *F; 3185 3186 switch (BuiltinID) { 3187 default: llvm_unreachable("unexpected builtin"); 3188 case ARM::BI__builtin_arm_ldaex: 3189 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); 3190 break; 3191 case ARM::BI__builtin_arm_ldrexd: 3192 case ARM::BI__builtin_arm_ldrex: 3193 case ARM::BI__ldrexd: 3194 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 3195 break; 3196 } 3197 3198 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 3199 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 3200 "ldrexd"); 3201 3202 Value *Val0 = Builder.CreateExtractValue(Val, 1); 3203 Value *Val1 = Builder.CreateExtractValue(Val, 0); 3204 Val0 = Builder.CreateZExt(Val0, Int64Ty); 3205 Val1 = Builder.CreateZExt(Val1, Int64Ty); 3206 3207 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 3208 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 3209 Val = Builder.CreateOr(Val, Val1); 3210 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 3211 } 3212 3213 if (BuiltinID == ARM::BI__builtin_arm_ldrex || 3214 BuiltinID == ARM::BI__builtin_arm_ldaex) { 3215 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 3216 3217 QualType Ty = E->getType(); 3218 llvm::Type *RealResTy = ConvertType(Ty); 3219 llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(), 3220 getContext().getTypeSize(Ty)); 3221 LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo()); 3222 3223 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex 3224 ? Intrinsic::arm_ldaex 3225 : Intrinsic::arm_ldrex, 3226 LoadAddr->getType()); 3227 Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); 3228 3229 if (RealResTy->isPointerTy()) 3230 return Builder.CreateIntToPtr(Val, RealResTy); 3231 else { 3232 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 3233 return Builder.CreateBitCast(Val, RealResTy); 3234 } 3235 } 3236 3237 if (BuiltinID == ARM::BI__builtin_arm_strexd || 3238 ((BuiltinID == ARM::BI__builtin_arm_stlex || 3239 BuiltinID == ARM::BI__builtin_arm_strex) && 3240 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { 3241 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 3242 ? Intrinsic::arm_stlexd 3243 : Intrinsic::arm_strexd); 3244 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, NULL); 3245 3246 Value *Tmp = CreateMemTemp(E->getArg(0)->getType()); 3247 Value *Val = EmitScalarExpr(E->getArg(0)); 3248 Builder.CreateStore(Val, Tmp); 3249 3250 Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 3251 Val = Builder.CreateLoad(LdPtr); 3252 3253 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 3254 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 3255 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); 3256 return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd"); 3257 } 3258 3259 if (BuiltinID == ARM::BI__builtin_arm_strex || 3260 BuiltinID == ARM::BI__builtin_arm_stlex) { 3261 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 3262 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 3263 3264 QualType Ty = E->getArg(0)->getType(); 3265 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 3266 getContext().getTypeSize(Ty)); 3267 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 3268 3269 if (StoreVal->getType()->isPointerTy()) 3270 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); 3271 else { 3272 StoreVal = Builder.CreateBitCast(StoreVal, StoreTy); 3273 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); 3274 } 3275 3276 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 3277 ? Intrinsic::arm_stlex 3278 : Intrinsic::arm_strex, 3279 StoreAddr->getType()); 3280 return Builder.CreateCall2(F, StoreVal, StoreAddr, "strex"); 3281 } 3282 3283 if (BuiltinID == ARM::BI__builtin_arm_clrex) { 3284 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); 3285 return Builder.CreateCall(F); 3286 } 3287 3288 // CRC32 3289 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 3290 switch (BuiltinID) { 3291 case ARM::BI__builtin_arm_crc32b: 3292 CRCIntrinsicID = Intrinsic::arm_crc32b; break; 3293 case ARM::BI__builtin_arm_crc32cb: 3294 CRCIntrinsicID = Intrinsic::arm_crc32cb; break; 3295 case ARM::BI__builtin_arm_crc32h: 3296 CRCIntrinsicID = Intrinsic::arm_crc32h; break; 3297 case ARM::BI__builtin_arm_crc32ch: 3298 CRCIntrinsicID = Intrinsic::arm_crc32ch; break; 3299 case ARM::BI__builtin_arm_crc32w: 3300 case ARM::BI__builtin_arm_crc32d: 3301 CRCIntrinsicID = Intrinsic::arm_crc32w; break; 3302 case ARM::BI__builtin_arm_crc32cw: 3303 case ARM::BI__builtin_arm_crc32cd: 3304 CRCIntrinsicID = Intrinsic::arm_crc32cw; break; 3305 } 3306 3307 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 3308 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 3309 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 3310 3311 // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w 3312 // intrinsics, hence we need different codegen for these cases. 3313 if (BuiltinID == ARM::BI__builtin_arm_crc32d || 3314 BuiltinID == ARM::BI__builtin_arm_crc32cd) { 3315 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 3316 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); 3317 Value *Arg1b = Builder.CreateLShr(Arg1, C1); 3318 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); 3319 3320 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 3321 Value *Res = Builder.CreateCall2(F, Arg0, Arg1a); 3322 return Builder.CreateCall2(F, Res, Arg1b); 3323 } else { 3324 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); 3325 3326 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 3327 return Builder.CreateCall2(F, Arg0, Arg1); 3328 } 3329 } 3330 3331 SmallVector<Value*, 4> Ops; 3332 llvm::Value *Align = nullptr; 3333 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 3334 if (i == 0) { 3335 switch (BuiltinID) { 3336 case NEON::BI__builtin_neon_vld1_v: 3337 case NEON::BI__builtin_neon_vld1q_v: 3338 case NEON::BI__builtin_neon_vld1q_lane_v: 3339 case NEON::BI__builtin_neon_vld1_lane_v: 3340 case NEON::BI__builtin_neon_vld1_dup_v: 3341 case NEON::BI__builtin_neon_vld1q_dup_v: 3342 case NEON::BI__builtin_neon_vst1_v: 3343 case NEON::BI__builtin_neon_vst1q_v: 3344 case NEON::BI__builtin_neon_vst1q_lane_v: 3345 case NEON::BI__builtin_neon_vst1_lane_v: 3346 case NEON::BI__builtin_neon_vst2_v: 3347 case NEON::BI__builtin_neon_vst2q_v: 3348 case NEON::BI__builtin_neon_vst2_lane_v: 3349 case NEON::BI__builtin_neon_vst2q_lane_v: 3350 case NEON::BI__builtin_neon_vst3_v: 3351 case NEON::BI__builtin_neon_vst3q_v: 3352 case NEON::BI__builtin_neon_vst3_lane_v: 3353 case NEON::BI__builtin_neon_vst3q_lane_v: 3354 case NEON::BI__builtin_neon_vst4_v: 3355 case NEON::BI__builtin_neon_vst4q_v: 3356 case NEON::BI__builtin_neon_vst4_lane_v: 3357 case NEON::BI__builtin_neon_vst4q_lane_v: 3358 // Get the alignment for the argument in addition to the value; 3359 // we'll use it later. 3360 std::pair<llvm::Value*, unsigned> Src = 3361 EmitPointerWithAlignment(E->getArg(0)); 3362 Ops.push_back(Src.first); 3363 Align = Builder.getInt32(Src.second); 3364 continue; 3365 } 3366 } 3367 if (i == 1) { 3368 switch (BuiltinID) { 3369 case NEON::BI__builtin_neon_vld2_v: 3370 case NEON::BI__builtin_neon_vld2q_v: 3371 case NEON::BI__builtin_neon_vld3_v: 3372 case NEON::BI__builtin_neon_vld3q_v: 3373 case NEON::BI__builtin_neon_vld4_v: 3374 case NEON::BI__builtin_neon_vld4q_v: 3375 case NEON::BI__builtin_neon_vld2_lane_v: 3376 case NEON::BI__builtin_neon_vld2q_lane_v: 3377 case NEON::BI__builtin_neon_vld3_lane_v: 3378 case NEON::BI__builtin_neon_vld3q_lane_v: 3379 case NEON::BI__builtin_neon_vld4_lane_v: 3380 case NEON::BI__builtin_neon_vld4q_lane_v: 3381 case NEON::BI__builtin_neon_vld2_dup_v: 3382 case NEON::BI__builtin_neon_vld3_dup_v: 3383 case NEON::BI__builtin_neon_vld4_dup_v: 3384 // Get the alignment for the argument in addition to the value; 3385 // we'll use it later. 3386 std::pair<llvm::Value*, unsigned> Src = 3387 EmitPointerWithAlignment(E->getArg(1)); 3388 Ops.push_back(Src.first); 3389 Align = Builder.getInt32(Src.second); 3390 continue; 3391 } 3392 } 3393 Ops.push_back(EmitScalarExpr(E->getArg(i))); 3394 } 3395 3396 switch (BuiltinID) { 3397 default: break; 3398 // vget_lane and vset_lane are not overloaded and do not have an extra 3399 // argument that specifies the vector type. 3400 case NEON::BI__builtin_neon_vget_lane_i8: 3401 case NEON::BI__builtin_neon_vget_lane_i16: 3402 case NEON::BI__builtin_neon_vget_lane_i32: 3403 case NEON::BI__builtin_neon_vget_lane_i64: 3404 case NEON::BI__builtin_neon_vget_lane_f32: 3405 case NEON::BI__builtin_neon_vgetq_lane_i8: 3406 case NEON::BI__builtin_neon_vgetq_lane_i16: 3407 case NEON::BI__builtin_neon_vgetq_lane_i32: 3408 case NEON::BI__builtin_neon_vgetq_lane_i64: 3409 case NEON::BI__builtin_neon_vgetq_lane_f32: 3410 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 3411 "vget_lane"); 3412 case NEON::BI__builtin_neon_vset_lane_i8: 3413 case NEON::BI__builtin_neon_vset_lane_i16: 3414 case NEON::BI__builtin_neon_vset_lane_i32: 3415 case NEON::BI__builtin_neon_vset_lane_i64: 3416 case NEON::BI__builtin_neon_vset_lane_f32: 3417 case NEON::BI__builtin_neon_vsetq_lane_i8: 3418 case NEON::BI__builtin_neon_vsetq_lane_i16: 3419 case NEON::BI__builtin_neon_vsetq_lane_i32: 3420 case NEON::BI__builtin_neon_vsetq_lane_i64: 3421 case NEON::BI__builtin_neon_vsetq_lane_f32: 3422 Ops.push_back(EmitScalarExpr(E->getArg(2))); 3423 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 3424 3425 // Non-polymorphic crypto instructions also not overloaded 3426 case NEON::BI__builtin_neon_vsha1h_u32: 3427 Ops.push_back(EmitScalarExpr(E->getArg(0))); 3428 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops, 3429 "vsha1h"); 3430 case NEON::BI__builtin_neon_vsha1cq_u32: 3431 Ops.push_back(EmitScalarExpr(E->getArg(2))); 3432 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops, 3433 "vsha1h"); 3434 case NEON::BI__builtin_neon_vsha1pq_u32: 3435 Ops.push_back(EmitScalarExpr(E->getArg(2))); 3436 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops, 3437 "vsha1h"); 3438 case NEON::BI__builtin_neon_vsha1mq_u32: 3439 Ops.push_back(EmitScalarExpr(E->getArg(2))); 3440 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, 3441 "vsha1h"); 3442 } 3443 3444 // Get the last argument, which specifies the vector type. 3445 llvm::APSInt Result; 3446 const Expr *Arg = E->getArg(E->getNumArgs()-1); 3447 if (!Arg->isIntegerConstantExpr(Result, getContext())) 3448 return nullptr; 3449 3450 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 3451 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 3452 // Determine the overloaded type of this builtin. 3453 llvm::Type *Ty; 3454 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 3455 Ty = FloatTy; 3456 else 3457 Ty = DoubleTy; 3458 3459 // Determine whether this is an unsigned conversion or not. 3460 bool usgn = Result.getZExtValue() == 1; 3461 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 3462 3463 // Call the appropriate intrinsic. 3464 Function *F = CGM.getIntrinsic(Int, Ty); 3465 return Builder.CreateCall(F, Ops, "vcvtr"); 3466 } 3467 3468 // Determine the type of this overloaded NEON intrinsic. 3469 NeonTypeFlags Type(Result.getZExtValue()); 3470 bool usgn = Type.isUnsigned(); 3471 bool rightShift = false; 3472 3473 llvm::VectorType *VTy = GetNeonType(this, Type); 3474 llvm::Type *Ty = VTy; 3475 if (!Ty) 3476 return nullptr; 3477 3478 // Many NEON builtins have identical semantics and uses in ARM and 3479 // AArch64. Emit these in a single function. 3480 auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap); 3481 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 3482 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); 3483 if (Builtin) 3484 return EmitCommonNeonBuiltinExpr( 3485 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 3486 Builtin->NameHint, Builtin->TypeModifier, E, Ops, Align); 3487 3488 unsigned Int; 3489 switch (BuiltinID) { 3490 default: return nullptr; 3491 case NEON::BI__builtin_neon_vld1q_lane_v: 3492 // Handle 64-bit integer elements as a special case. Use shuffles of 3493 // one-element vectors to avoid poor code for i64 in the backend. 3494 if (VTy->getElementType()->isIntegerTy(64)) { 3495 // Extract the other lane. 3496 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3497 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); 3498 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); 3499 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 3500 // Load the value as a one-element vector. 3501 Ty = llvm::VectorType::get(VTy->getElementType(), 1); 3502 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty); 3503 Value *Ld = Builder.CreateCall2(F, Ops[0], Align); 3504 // Combine them. 3505 SmallVector<Constant*, 2> Indices; 3506 Indices.push_back(ConstantInt::get(Int32Ty, 1-Lane)); 3507 Indices.push_back(ConstantInt::get(Int32Ty, Lane)); 3508 SV = llvm::ConstantVector::get(Indices); 3509 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); 3510 } 3511 // fall through 3512 case NEON::BI__builtin_neon_vld1_lane_v: { 3513 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3514 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 3515 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3516 LoadInst *Ld = Builder.CreateLoad(Ops[0]); 3517 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 3518 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 3519 } 3520 case NEON::BI__builtin_neon_vld2_dup_v: 3521 case NEON::BI__builtin_neon_vld3_dup_v: 3522 case NEON::BI__builtin_neon_vld4_dup_v: { 3523 // Handle 64-bit elements as a special-case. There is no "dup" needed. 3524 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { 3525 switch (BuiltinID) { 3526 case NEON::BI__builtin_neon_vld2_dup_v: 3527 Int = Intrinsic::arm_neon_vld2; 3528 break; 3529 case NEON::BI__builtin_neon_vld3_dup_v: 3530 Int = Intrinsic::arm_neon_vld3; 3531 break; 3532 case NEON::BI__builtin_neon_vld4_dup_v: 3533 Int = Intrinsic::arm_neon_vld4; 3534 break; 3535 default: llvm_unreachable("unknown vld_dup intrinsic?"); 3536 } 3537 Function *F = CGM.getIntrinsic(Int, Ty); 3538 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup"); 3539 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3540 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3541 return Builder.CreateStore(Ops[1], Ops[0]); 3542 } 3543 switch (BuiltinID) { 3544 case NEON::BI__builtin_neon_vld2_dup_v: 3545 Int = Intrinsic::arm_neon_vld2lane; 3546 break; 3547 case NEON::BI__builtin_neon_vld3_dup_v: 3548 Int = Intrinsic::arm_neon_vld3lane; 3549 break; 3550 case NEON::BI__builtin_neon_vld4_dup_v: 3551 Int = Intrinsic::arm_neon_vld4lane; 3552 break; 3553 default: llvm_unreachable("unknown vld_dup intrinsic?"); 3554 } 3555 Function *F = CGM.getIntrinsic(Int, Ty); 3556 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 3557 3558 SmallVector<Value*, 6> Args; 3559 Args.push_back(Ops[1]); 3560 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 3561 3562 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 3563 Args.push_back(CI); 3564 Args.push_back(Align); 3565 3566 Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); 3567 // splat lane 0 to all elts in each vector of the result. 3568 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 3569 Value *Val = Builder.CreateExtractValue(Ops[1], i); 3570 Value *Elt = Builder.CreateBitCast(Val, Ty); 3571 Elt = EmitNeonSplat(Elt, CI); 3572 Elt = Builder.CreateBitCast(Elt, Val->getType()); 3573 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 3574 } 3575 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3576 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3577 return Builder.CreateStore(Ops[1], Ops[0]); 3578 } 3579 case NEON::BI__builtin_neon_vqrshrn_n_v: 3580 Int = 3581 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 3582 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 3583 1, true); 3584 case NEON::BI__builtin_neon_vqrshrun_n_v: 3585 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), 3586 Ops, "vqrshrun_n", 1, true); 3587 case NEON::BI__builtin_neon_vqshrn_n_v: 3588 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 3589 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 3590 1, true); 3591 case NEON::BI__builtin_neon_vqshrun_n_v: 3592 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), 3593 Ops, "vqshrun_n", 1, true); 3594 case NEON::BI__builtin_neon_vrecpe_v: 3595 case NEON::BI__builtin_neon_vrecpeq_v: 3596 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), 3597 Ops, "vrecpe"); 3598 case NEON::BI__builtin_neon_vrshrn_n_v: 3599 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), 3600 Ops, "vrshrn_n", 1, true); 3601 case NEON::BI__builtin_neon_vrsra_n_v: 3602 case NEON::BI__builtin_neon_vrsraq_n_v: 3603 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3604 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3605 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 3606 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 3607 Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]); 3608 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 3609 case NEON::BI__builtin_neon_vsri_n_v: 3610 case NEON::BI__builtin_neon_vsriq_n_v: 3611 rightShift = true; 3612 case NEON::BI__builtin_neon_vsli_n_v: 3613 case NEON::BI__builtin_neon_vsliq_n_v: 3614 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 3615 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), 3616 Ops, "vsli_n"); 3617 case NEON::BI__builtin_neon_vsra_n_v: 3618 case NEON::BI__builtin_neon_vsraq_n_v: 3619 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3620 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 3621 return Builder.CreateAdd(Ops[0], Ops[1]); 3622 case NEON::BI__builtin_neon_vst1q_lane_v: 3623 // Handle 64-bit integer elements as a special case. Use a shuffle to get 3624 // a one-element vector and avoid poor code for i64 in the backend. 3625 if (VTy->getElementType()->isIntegerTy(64)) { 3626 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3627 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); 3628 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 3629 Ops[2] = Align; 3630 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, 3631 Ops[1]->getType()), Ops); 3632 } 3633 // fall through 3634 case NEON::BI__builtin_neon_vst1_lane_v: { 3635 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3636 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 3637 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3638 StoreInst *St = Builder.CreateStore(Ops[1], 3639 Builder.CreateBitCast(Ops[0], Ty)); 3640 St->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 3641 return St; 3642 } 3643 case NEON::BI__builtin_neon_vtbl1_v: 3644 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 3645 Ops, "vtbl1"); 3646 case NEON::BI__builtin_neon_vtbl2_v: 3647 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 3648 Ops, "vtbl2"); 3649 case NEON::BI__builtin_neon_vtbl3_v: 3650 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 3651 Ops, "vtbl3"); 3652 case NEON::BI__builtin_neon_vtbl4_v: 3653 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 3654 Ops, "vtbl4"); 3655 case NEON::BI__builtin_neon_vtbx1_v: 3656 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 3657 Ops, "vtbx1"); 3658 case NEON::BI__builtin_neon_vtbx2_v: 3659 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 3660 Ops, "vtbx2"); 3661 case NEON::BI__builtin_neon_vtbx3_v: 3662 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 3663 Ops, "vtbx3"); 3664 case NEON::BI__builtin_neon_vtbx4_v: 3665 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 3666 Ops, "vtbx4"); 3667 } 3668 } 3669 3670 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, 3671 const CallExpr *E, 3672 SmallVectorImpl<Value *> &Ops) { 3673 unsigned int Int = 0; 3674 const char *s = nullptr; 3675 3676 switch (BuiltinID) { 3677 default: 3678 return nullptr; 3679 case NEON::BI__builtin_neon_vtbl1_v: 3680 case NEON::BI__builtin_neon_vqtbl1_v: 3681 case NEON::BI__builtin_neon_vqtbl1q_v: 3682 case NEON::BI__builtin_neon_vtbl2_v: 3683 case NEON::BI__builtin_neon_vqtbl2_v: 3684 case NEON::BI__builtin_neon_vqtbl2q_v: 3685 case NEON::BI__builtin_neon_vtbl3_v: 3686 case NEON::BI__builtin_neon_vqtbl3_v: 3687 case NEON::BI__builtin_neon_vqtbl3q_v: 3688 case NEON::BI__builtin_neon_vtbl4_v: 3689 case NEON::BI__builtin_neon_vqtbl4_v: 3690 case NEON::BI__builtin_neon_vqtbl4q_v: 3691 break; 3692 case NEON::BI__builtin_neon_vtbx1_v: 3693 case NEON::BI__builtin_neon_vqtbx1_v: 3694 case NEON::BI__builtin_neon_vqtbx1q_v: 3695 case NEON::BI__builtin_neon_vtbx2_v: 3696 case NEON::BI__builtin_neon_vqtbx2_v: 3697 case NEON::BI__builtin_neon_vqtbx2q_v: 3698 case NEON::BI__builtin_neon_vtbx3_v: 3699 case NEON::BI__builtin_neon_vqtbx3_v: 3700 case NEON::BI__builtin_neon_vqtbx3q_v: 3701 case NEON::BI__builtin_neon_vtbx4_v: 3702 case NEON::BI__builtin_neon_vqtbx4_v: 3703 case NEON::BI__builtin_neon_vqtbx4q_v: 3704 break; 3705 } 3706 3707 assert(E->getNumArgs() >= 3); 3708 3709 // Get the last argument, which specifies the vector type. 3710 llvm::APSInt Result; 3711 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 3712 if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) 3713 return nullptr; 3714 3715 // Determine the type of this overloaded NEON intrinsic. 3716 NeonTypeFlags Type(Result.getZExtValue()); 3717 llvm::VectorType *VTy = GetNeonType(&CGF, Type); 3718 llvm::Type *Ty = VTy; 3719 if (!Ty) 3720 return nullptr; 3721 3722 unsigned nElts = VTy->getNumElements(); 3723 3724 CodeGen::CGBuilderTy &Builder = CGF.Builder; 3725 3726 // AArch64 scalar builtins are not overloaded, they do not have an extra 3727 // argument that specifies the vector type, need to handle each case. 3728 SmallVector<Value *, 2> TblOps; 3729 switch (BuiltinID) { 3730 case NEON::BI__builtin_neon_vtbl1_v: { 3731 TblOps.push_back(Ops[0]); 3732 return packTBLDVectorList(CGF, TblOps, nullptr, Ops[1], Ty, 3733 Intrinsic::aarch64_neon_tbl1, "vtbl1"); 3734 } 3735 case NEON::BI__builtin_neon_vtbl2_v: { 3736 TblOps.push_back(Ops[0]); 3737 TblOps.push_back(Ops[1]); 3738 return packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty, 3739 Intrinsic::aarch64_neon_tbl1, "vtbl1"); 3740 } 3741 case NEON::BI__builtin_neon_vtbl3_v: { 3742 TblOps.push_back(Ops[0]); 3743 TblOps.push_back(Ops[1]); 3744 TblOps.push_back(Ops[2]); 3745 return packTBLDVectorList(CGF, TblOps, nullptr, Ops[3], Ty, 3746 Intrinsic::aarch64_neon_tbl2, "vtbl2"); 3747 } 3748 case NEON::BI__builtin_neon_vtbl4_v: { 3749 TblOps.push_back(Ops[0]); 3750 TblOps.push_back(Ops[1]); 3751 TblOps.push_back(Ops[2]); 3752 TblOps.push_back(Ops[3]); 3753 return packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty, 3754 Intrinsic::aarch64_neon_tbl2, "vtbl2"); 3755 } 3756 case NEON::BI__builtin_neon_vtbx1_v: { 3757 TblOps.push_back(Ops[1]); 3758 Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty, 3759 Intrinsic::aarch64_neon_tbl1, "vtbl1"); 3760 3761 llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8); 3762 Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight); 3763 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); 3764 CmpRes = Builder.CreateSExt(CmpRes, Ty); 3765 3766 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 3767 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 3768 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 3769 } 3770 case NEON::BI__builtin_neon_vtbx2_v: { 3771 TblOps.push_back(Ops[1]); 3772 TblOps.push_back(Ops[2]); 3773 return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty, 3774 Intrinsic::aarch64_neon_tbx1, "vtbx1"); 3775 } 3776 case NEON::BI__builtin_neon_vtbx3_v: { 3777 TblOps.push_back(Ops[1]); 3778 TblOps.push_back(Ops[2]); 3779 TblOps.push_back(Ops[3]); 3780 Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty, 3781 Intrinsic::aarch64_neon_tbl2, "vtbl2"); 3782 3783 llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24); 3784 Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour); 3785 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], 3786 TwentyFourV); 3787 CmpRes = Builder.CreateSExt(CmpRes, Ty); 3788 3789 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 3790 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 3791 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 3792 } 3793 case NEON::BI__builtin_neon_vtbx4_v: { 3794 TblOps.push_back(Ops[1]); 3795 TblOps.push_back(Ops[2]); 3796 TblOps.push_back(Ops[3]); 3797 TblOps.push_back(Ops[4]); 3798 return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty, 3799 Intrinsic::aarch64_neon_tbx2, "vtbx2"); 3800 } 3801 case NEON::BI__builtin_neon_vqtbl1_v: 3802 case NEON::BI__builtin_neon_vqtbl1q_v: 3803 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break; 3804 case NEON::BI__builtin_neon_vqtbl2_v: 3805 case NEON::BI__builtin_neon_vqtbl2q_v: { 3806 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break; 3807 case NEON::BI__builtin_neon_vqtbl3_v: 3808 case NEON::BI__builtin_neon_vqtbl3q_v: 3809 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break; 3810 case NEON::BI__builtin_neon_vqtbl4_v: 3811 case NEON::BI__builtin_neon_vqtbl4q_v: 3812 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break; 3813 case NEON::BI__builtin_neon_vqtbx1_v: 3814 case NEON::BI__builtin_neon_vqtbx1q_v: 3815 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break; 3816 case NEON::BI__builtin_neon_vqtbx2_v: 3817 case NEON::BI__builtin_neon_vqtbx2q_v: 3818 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break; 3819 case NEON::BI__builtin_neon_vqtbx3_v: 3820 case NEON::BI__builtin_neon_vqtbx3q_v: 3821 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break; 3822 case NEON::BI__builtin_neon_vqtbx4_v: 3823 case NEON::BI__builtin_neon_vqtbx4q_v: 3824 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break; 3825 } 3826 } 3827 3828 if (!Int) 3829 return nullptr; 3830 3831 Function *F = CGF.CGM.getIntrinsic(Int, Ty); 3832 return CGF.EmitNeonCall(F, Ops, s); 3833 } 3834 3835 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { 3836 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 3837 Op = Builder.CreateBitCast(Op, Int16Ty); 3838 Value *V = UndefValue::get(VTy); 3839 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 3840 Op = Builder.CreateInsertElement(V, Op, CI); 3841 return Op; 3842 } 3843 3844 Value *CodeGenFunction::vectorWrapScalar8(Value *Op) { 3845 llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8); 3846 Op = Builder.CreateBitCast(Op, Int8Ty); 3847 Value *V = UndefValue::get(VTy); 3848 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 3849 Op = Builder.CreateInsertElement(V, Op, CI); 3850 return Op; 3851 } 3852 3853 Value *CodeGenFunction:: 3854 emitVectorWrappedScalar8Intrinsic(unsigned Int, SmallVectorImpl<Value*> &Ops, 3855 const char *Name) { 3856 // i8 is not a legal types for AArch64, so we can't just use 3857 // a normal overloaded intrinsic call for these scalar types. Instead 3858 // we'll build 64-bit vectors w/ lane zero being our input values and 3859 // perform the operation on that. The back end can pattern match directly 3860 // to the scalar instruction. 3861 Ops[0] = vectorWrapScalar8(Ops[0]); 3862 Ops[1] = vectorWrapScalar8(Ops[1]); 3863 llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8); 3864 Value *V = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, Name); 3865 Constant *CI = ConstantInt::get(SizeTy, 0); 3866 return Builder.CreateExtractElement(V, CI, "lane0"); 3867 } 3868 3869 Value *CodeGenFunction:: 3870 emitVectorWrappedScalar16Intrinsic(unsigned Int, SmallVectorImpl<Value*> &Ops, 3871 const char *Name) { 3872 // i16 is not a legal types for AArch64, so we can't just use 3873 // a normal overloaded intrinsic call for these scalar types. Instead 3874 // we'll build 64-bit vectors w/ lane zero being our input values and 3875 // perform the operation on that. The back end can pattern match directly 3876 // to the scalar instruction. 3877 Ops[0] = vectorWrapScalar16(Ops[0]); 3878 Ops[1] = vectorWrapScalar16(Ops[1]); 3879 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 3880 Value *V = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, Name); 3881 Constant *CI = ConstantInt::get(SizeTy, 0); 3882 return Builder.CreateExtractElement(V, CI, "lane0"); 3883 } 3884 3885 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, 3886 const CallExpr *E) { 3887 unsigned HintID = static_cast<unsigned>(-1); 3888 switch (BuiltinID) { 3889 default: break; 3890 case AArch64::BI__builtin_arm_nop: 3891 HintID = 0; 3892 break; 3893 case AArch64::BI__builtin_arm_yield: 3894 HintID = 1; 3895 break; 3896 case AArch64::BI__builtin_arm_wfe: 3897 HintID = 2; 3898 break; 3899 case AArch64::BI__builtin_arm_wfi: 3900 HintID = 3; 3901 break; 3902 case AArch64::BI__builtin_arm_sev: 3903 HintID = 4; 3904 break; 3905 case AArch64::BI__builtin_arm_sevl: 3906 HintID = 5; 3907 break; 3908 } 3909 3910 if (HintID != static_cast<unsigned>(-1)) { 3911 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint); 3912 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 3913 } 3914 3915 if (BuiltinID == AArch64::BI__builtin_arm_prefetch) { 3916 Value *Address = EmitScalarExpr(E->getArg(0)); 3917 Value *RW = EmitScalarExpr(E->getArg(1)); 3918 Value *CacheLevel = EmitScalarExpr(E->getArg(2)); 3919 Value *RetentionPolicy = EmitScalarExpr(E->getArg(3)); 3920 Value *IsData = EmitScalarExpr(E->getArg(4)); 3921 3922 Value *Locality = nullptr; 3923 if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) { 3924 // Temporal fetch, needs to convert cache level to locality. 3925 Locality = llvm::ConstantInt::get(Int32Ty, 3926 -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3); 3927 } else { 3928 // Streaming fetch. 3929 Locality = llvm::ConstantInt::get(Int32Ty, 0); 3930 } 3931 3932 // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify 3933 // PLDL3STRM or PLDL2STRM. 3934 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 3935 return Builder.CreateCall4(F, Address, RW, Locality, IsData); 3936 } 3937 3938 if (BuiltinID == AArch64::BI__builtin_arm_rbit) { 3939 assert((getContext().getTypeSize(E->getType()) == 32) && 3940 "rbit of unusual size!"); 3941 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 3942 return Builder.CreateCall( 3943 CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit"); 3944 } 3945 if (BuiltinID == AArch64::BI__builtin_arm_rbit64) { 3946 assert((getContext().getTypeSize(E->getType()) == 64) && 3947 "rbit of unusual size!"); 3948 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 3949 return Builder.CreateCall( 3950 CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit"); 3951 } 3952 3953 if (BuiltinID == AArch64::BI__clear_cache) { 3954 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 3955 const FunctionDecl *FD = E->getDirectCallee(); 3956 SmallVector<Value*, 2> Ops; 3957 for (unsigned i = 0; i < 2; i++) 3958 Ops.push_back(EmitScalarExpr(E->getArg(i))); 3959 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 3960 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 3961 StringRef Name = FD->getName(); 3962 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 3963 } 3964 3965 if ((BuiltinID == AArch64::BI__builtin_arm_ldrex || 3966 BuiltinID == AArch64::BI__builtin_arm_ldaex) && 3967 getContext().getTypeSize(E->getType()) == 128) { 3968 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 3969 ? Intrinsic::aarch64_ldaxp 3970 : Intrinsic::aarch64_ldxp); 3971 3972 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 3973 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 3974 "ldxp"); 3975 3976 Value *Val0 = Builder.CreateExtractValue(Val, 1); 3977 Value *Val1 = Builder.CreateExtractValue(Val, 0); 3978 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 3979 Val0 = Builder.CreateZExt(Val0, Int128Ty); 3980 Val1 = Builder.CreateZExt(Val1, Int128Ty); 3981 3982 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64); 3983 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 3984 Val = Builder.CreateOr(Val, Val1); 3985 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 3986 } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex || 3987 BuiltinID == AArch64::BI__builtin_arm_ldaex) { 3988 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 3989 3990 QualType Ty = E->getType(); 3991 llvm::Type *RealResTy = ConvertType(Ty); 3992 llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(), 3993 getContext().getTypeSize(Ty)); 3994 LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo()); 3995 3996 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 3997 ? Intrinsic::aarch64_ldaxr 3998 : Intrinsic::aarch64_ldxr, 3999 LoadAddr->getType()); 4000 Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); 4001 4002 if (RealResTy->isPointerTy()) 4003 return Builder.CreateIntToPtr(Val, RealResTy); 4004 4005 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 4006 return Builder.CreateBitCast(Val, RealResTy); 4007 } 4008 4009 if ((BuiltinID == AArch64::BI__builtin_arm_strex || 4010 BuiltinID == AArch64::BI__builtin_arm_stlex) && 4011 getContext().getTypeSize(E->getArg(0)->getType()) == 128) { 4012 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 4013 ? Intrinsic::aarch64_stlxp 4014 : Intrinsic::aarch64_stxp); 4015 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, NULL); 4016 4017 Value *One = llvm::ConstantInt::get(Int32Ty, 1); 4018 Value *Tmp = Builder.CreateAlloca(ConvertType(E->getArg(0)->getType()), 4019 One); 4020 Value *Val = EmitScalarExpr(E->getArg(0)); 4021 Builder.CreateStore(Val, Tmp); 4022 4023 Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 4024 Val = Builder.CreateLoad(LdPtr); 4025 4026 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 4027 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 4028 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), 4029 Int8PtrTy); 4030 return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "stxp"); 4031 } else if (BuiltinID == AArch64::BI__builtin_arm_strex || 4032 BuiltinID == AArch64::BI__builtin_arm_stlex) { 4033 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 4034 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 4035 4036 QualType Ty = E->getArg(0)->getType(); 4037 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 4038 getContext().getTypeSize(Ty)); 4039 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 4040 4041 if (StoreVal->getType()->isPointerTy()) 4042 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); 4043 else { 4044 StoreVal = Builder.CreateBitCast(StoreVal, StoreTy); 4045 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); 4046 } 4047 4048 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 4049 ? Intrinsic::aarch64_stlxr 4050 : Intrinsic::aarch64_stxr, 4051 StoreAddr->getType()); 4052 return Builder.CreateCall2(F, StoreVal, StoreAddr, "stxr"); 4053 } 4054 4055 if (BuiltinID == AArch64::BI__builtin_arm_clrex) { 4056 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); 4057 return Builder.CreateCall(F); 4058 } 4059 4060 // CRC32 4061 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 4062 switch (BuiltinID) { 4063 case AArch64::BI__builtin_arm_crc32b: 4064 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; 4065 case AArch64::BI__builtin_arm_crc32cb: 4066 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; 4067 case AArch64::BI__builtin_arm_crc32h: 4068 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; 4069 case AArch64::BI__builtin_arm_crc32ch: 4070 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; 4071 case AArch64::BI__builtin_arm_crc32w: 4072 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; 4073 case AArch64::BI__builtin_arm_crc32cw: 4074 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; 4075 case AArch64::BI__builtin_arm_crc32d: 4076 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; 4077 case AArch64::BI__builtin_arm_crc32cd: 4078 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; 4079 } 4080 4081 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 4082 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 4083 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 4084 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4085 4086 llvm::Type *DataTy = F->getFunctionType()->getParamType(1); 4087 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); 4088 4089 return Builder.CreateCall2(F, Arg0, Arg1); 4090 } 4091 4092 llvm::SmallVector<Value*, 4> Ops; 4093 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) 4094 Ops.push_back(EmitScalarExpr(E->getArg(i))); 4095 4096 auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap); 4097 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 4098 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); 4099 4100 if (Builtin) { 4101 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); 4102 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); 4103 assert(Result && "SISD intrinsic should have been handled"); 4104 return Result; 4105 } 4106 4107 llvm::APSInt Result; 4108 const Expr *Arg = E->getArg(E->getNumArgs()-1); 4109 NeonTypeFlags Type(0); 4110 if (Arg->isIntegerConstantExpr(Result, getContext())) 4111 // Determine the type of this overloaded NEON intrinsic. 4112 Type = NeonTypeFlags(Result.getZExtValue()); 4113 4114 bool usgn = Type.isUnsigned(); 4115 bool quad = Type.isQuad(); 4116 4117 // Handle non-overloaded intrinsics first. 4118 switch (BuiltinID) { 4119 default: break; 4120 case NEON::BI__builtin_neon_vldrq_p128: { 4121 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 4122 Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); 4123 return Builder.CreateLoad(Ptr); 4124 } 4125 case NEON::BI__builtin_neon_vstrq_p128: { 4126 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 4127 Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); 4128 return Builder.CreateStore(EmitScalarExpr(E->getArg(1)), Ptr); 4129 } 4130 case NEON::BI__builtin_neon_vcvts_u32_f32: 4131 case NEON::BI__builtin_neon_vcvtd_u64_f64: 4132 usgn = true; 4133 // FALL THROUGH 4134 case NEON::BI__builtin_neon_vcvts_s32_f32: 4135 case NEON::BI__builtin_neon_vcvtd_s64_f64: { 4136 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4137 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 4138 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 4139 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 4140 Ops[0] = Builder.CreateBitCast(Ops[0], FTy); 4141 if (usgn) 4142 return Builder.CreateFPToUI(Ops[0], InTy); 4143 return Builder.CreateFPToSI(Ops[0], InTy); 4144 } 4145 case NEON::BI__builtin_neon_vcvts_f32_u32: 4146 case NEON::BI__builtin_neon_vcvtd_f64_u64: 4147 usgn = true; 4148 // FALL THROUGH 4149 case NEON::BI__builtin_neon_vcvts_f32_s32: 4150 case NEON::BI__builtin_neon_vcvtd_f64_s64: { 4151 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4152 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 4153 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 4154 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 4155 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 4156 if (usgn) 4157 return Builder.CreateUIToFP(Ops[0], FTy); 4158 return Builder.CreateSIToFP(Ops[0], FTy); 4159 } 4160 case NEON::BI__builtin_neon_vpaddd_s64: { 4161 llvm::Type *Ty = 4162 llvm::VectorType::get(llvm::Type::getInt64Ty(getLLVMContext()), 2); 4163 Value *Vec = EmitScalarExpr(E->getArg(0)); 4164 // The vector is v2f64, so make sure it's bitcast to that. 4165 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); 4166 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 4167 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 4168 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 4169 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 4170 // Pairwise addition of a v2f64 into a scalar f64. 4171 return Builder.CreateAdd(Op0, Op1, "vpaddd"); 4172 } 4173 case NEON::BI__builtin_neon_vpaddd_f64: { 4174 llvm::Type *Ty = 4175 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2); 4176 Value *Vec = EmitScalarExpr(E->getArg(0)); 4177 // The vector is v2f64, so make sure it's bitcast to that. 4178 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); 4179 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 4180 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 4181 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 4182 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 4183 // Pairwise addition of a v2f64 into a scalar f64. 4184 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 4185 } 4186 case NEON::BI__builtin_neon_vpadds_f32: { 4187 llvm::Type *Ty = 4188 llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2); 4189 Value *Vec = EmitScalarExpr(E->getArg(0)); 4190 // The vector is v2f32, so make sure it's bitcast to that. 4191 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); 4192 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 4193 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 4194 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 4195 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 4196 // Pairwise addition of a v2f32 into a scalar f32. 4197 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 4198 } 4199 case NEON::BI__builtin_neon_vceqzd_s64: 4200 case NEON::BI__builtin_neon_vceqzd_f64: 4201 case NEON::BI__builtin_neon_vceqzs_f32: 4202 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4203 return EmitAArch64CompareBuiltinExpr( 4204 Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OEQ, 4205 ICmpInst::ICMP_EQ, "vceqz"); 4206 case NEON::BI__builtin_neon_vcgezd_s64: 4207 case NEON::BI__builtin_neon_vcgezd_f64: 4208 case NEON::BI__builtin_neon_vcgezs_f32: 4209 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4210 return EmitAArch64CompareBuiltinExpr( 4211 Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OGE, 4212 ICmpInst::ICMP_SGE, "vcgez"); 4213 case NEON::BI__builtin_neon_vclezd_s64: 4214 case NEON::BI__builtin_neon_vclezd_f64: 4215 case NEON::BI__builtin_neon_vclezs_f32: 4216 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4217 return EmitAArch64CompareBuiltinExpr( 4218 Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OLE, 4219 ICmpInst::ICMP_SLE, "vclez"); 4220 case NEON::BI__builtin_neon_vcgtzd_s64: 4221 case NEON::BI__builtin_neon_vcgtzd_f64: 4222 case NEON::BI__builtin_neon_vcgtzs_f32: 4223 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4224 return EmitAArch64CompareBuiltinExpr( 4225 Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OGT, 4226 ICmpInst::ICMP_SGT, "vcgtz"); 4227 case NEON::BI__builtin_neon_vcltzd_s64: 4228 case NEON::BI__builtin_neon_vcltzd_f64: 4229 case NEON::BI__builtin_neon_vcltzs_f32: 4230 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4231 return EmitAArch64CompareBuiltinExpr( 4232 Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OLT, 4233 ICmpInst::ICMP_SLT, "vcltz"); 4234 4235 case NEON::BI__builtin_neon_vceqzd_u64: { 4236 llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext()); 4237 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4238 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4239 Ops[0] = Builder.CreateICmp(llvm::ICmpInst::ICMP_EQ, Ops[0], 4240 llvm::Constant::getNullValue(Ty)); 4241 return Builder.CreateSExt(Ops[0], Ty, "vceqzd"); 4242 } 4243 case NEON::BI__builtin_neon_vceqd_f64: 4244 case NEON::BI__builtin_neon_vcled_f64: 4245 case NEON::BI__builtin_neon_vcltd_f64: 4246 case NEON::BI__builtin_neon_vcged_f64: 4247 case NEON::BI__builtin_neon_vcgtd_f64: { 4248 llvm::CmpInst::Predicate P; 4249 switch (BuiltinID) { 4250 default: llvm_unreachable("missing builtin ID in switch!"); 4251 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break; 4252 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break; 4253 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break; 4254 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break; 4255 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break; 4256 } 4257 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4258 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 4259 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 4260 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 4261 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); 4262 } 4263 case NEON::BI__builtin_neon_vceqs_f32: 4264 case NEON::BI__builtin_neon_vcles_f32: 4265 case NEON::BI__builtin_neon_vclts_f32: 4266 case NEON::BI__builtin_neon_vcges_f32: 4267 case NEON::BI__builtin_neon_vcgts_f32: { 4268 llvm::CmpInst::Predicate P; 4269 switch (BuiltinID) { 4270 default: llvm_unreachable("missing builtin ID in switch!"); 4271 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break; 4272 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break; 4273 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break; 4274 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break; 4275 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break; 4276 } 4277 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4278 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 4279 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); 4280 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 4281 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); 4282 } 4283 case NEON::BI__builtin_neon_vceqd_s64: 4284 case NEON::BI__builtin_neon_vceqd_u64: 4285 case NEON::BI__builtin_neon_vcgtd_s64: 4286 case NEON::BI__builtin_neon_vcgtd_u64: 4287 case NEON::BI__builtin_neon_vcltd_s64: 4288 case NEON::BI__builtin_neon_vcltd_u64: 4289 case NEON::BI__builtin_neon_vcged_u64: 4290 case NEON::BI__builtin_neon_vcged_s64: 4291 case NEON::BI__builtin_neon_vcled_u64: 4292 case NEON::BI__builtin_neon_vcled_s64: { 4293 llvm::CmpInst::Predicate P; 4294 switch (BuiltinID) { 4295 default: llvm_unreachable("missing builtin ID in switch!"); 4296 case NEON::BI__builtin_neon_vceqd_s64: 4297 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break; 4298 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break; 4299 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break; 4300 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break; 4301 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break; 4302 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break; 4303 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break; 4304 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break; 4305 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break; 4306 } 4307 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4308 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 4309 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 4310 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); 4311 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); 4312 } 4313 case NEON::BI__builtin_neon_vtstd_s64: 4314 case NEON::BI__builtin_neon_vtstd_u64: { 4315 llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext()); 4316 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4317 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4318 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4319 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 4320 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 4321 llvm::Constant::getNullValue(Ty)); 4322 return Builder.CreateSExt(Ops[0], Ty, "vtstd"); 4323 } 4324 case NEON::BI__builtin_neon_vset_lane_i8: 4325 case NEON::BI__builtin_neon_vset_lane_i16: 4326 case NEON::BI__builtin_neon_vset_lane_i32: 4327 case NEON::BI__builtin_neon_vset_lane_i64: 4328 case NEON::BI__builtin_neon_vset_lane_f32: 4329 case NEON::BI__builtin_neon_vsetq_lane_i8: 4330 case NEON::BI__builtin_neon_vsetq_lane_i16: 4331 case NEON::BI__builtin_neon_vsetq_lane_i32: 4332 case NEON::BI__builtin_neon_vsetq_lane_i64: 4333 case NEON::BI__builtin_neon_vsetq_lane_f32: 4334 Ops.push_back(EmitScalarExpr(E->getArg(2))); 4335 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4336 case NEON::BI__builtin_neon_vset_lane_f64: 4337 // The vector type needs a cast for the v1f64 variant. 4338 Ops[1] = Builder.CreateBitCast(Ops[1], 4339 llvm::VectorType::get(DoubleTy, 1)); 4340 Ops.push_back(EmitScalarExpr(E->getArg(2))); 4341 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4342 case NEON::BI__builtin_neon_vsetq_lane_f64: 4343 // The vector type needs a cast for the v2f64 variant. 4344 Ops[1] = Builder.CreateBitCast(Ops[1], 4345 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2)); 4346 Ops.push_back(EmitScalarExpr(E->getArg(2))); 4347 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4348 4349 case NEON::BI__builtin_neon_vget_lane_i8: 4350 case NEON::BI__builtin_neon_vdupb_lane_i8: 4351 Ops[0] = Builder.CreateBitCast(Ops[0], 4352 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8)); 4353 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4354 "vget_lane"); 4355 case NEON::BI__builtin_neon_vgetq_lane_i8: 4356 case NEON::BI__builtin_neon_vdupb_laneq_i8: 4357 Ops[0] = Builder.CreateBitCast(Ops[0], 4358 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16)); 4359 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4360 "vgetq_lane"); 4361 case NEON::BI__builtin_neon_vget_lane_i16: 4362 case NEON::BI__builtin_neon_vduph_lane_i16: 4363 Ops[0] = Builder.CreateBitCast(Ops[0], 4364 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4)); 4365 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4366 "vget_lane"); 4367 case NEON::BI__builtin_neon_vgetq_lane_i16: 4368 case NEON::BI__builtin_neon_vduph_laneq_i16: 4369 Ops[0] = Builder.CreateBitCast(Ops[0], 4370 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8)); 4371 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4372 "vgetq_lane"); 4373 case NEON::BI__builtin_neon_vget_lane_i32: 4374 case NEON::BI__builtin_neon_vdups_lane_i32: 4375 Ops[0] = Builder.CreateBitCast( 4376 Ops[0], 4377 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 32), 2)); 4378 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4379 "vget_lane"); 4380 case NEON::BI__builtin_neon_vdups_lane_f32: 4381 Ops[0] = Builder.CreateBitCast(Ops[0], 4382 llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2)); 4383 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4384 "vdups_lane"); 4385 case NEON::BI__builtin_neon_vgetq_lane_i32: 4386 case NEON::BI__builtin_neon_vdups_laneq_i32: 4387 Ops[0] = Builder.CreateBitCast(Ops[0], 4388 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 32), 4)); 4389 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4390 "vgetq_lane"); 4391 case NEON::BI__builtin_neon_vget_lane_i64: 4392 case NEON::BI__builtin_neon_vdupd_lane_i64: 4393 Ops[0] = Builder.CreateBitCast(Ops[0], 4394 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 64), 1)); 4395 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4396 "vget_lane"); 4397 case NEON::BI__builtin_neon_vdupd_lane_f64: 4398 Ops[0] = Builder.CreateBitCast(Ops[0], 4399 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 1)); 4400 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4401 "vdupd_lane"); 4402 case NEON::BI__builtin_neon_vgetq_lane_i64: 4403 case NEON::BI__builtin_neon_vdupd_laneq_i64: 4404 Ops[0] = Builder.CreateBitCast(Ops[0], 4405 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 64), 2)); 4406 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4407 "vgetq_lane"); 4408 case NEON::BI__builtin_neon_vget_lane_f32: 4409 Ops[0] = Builder.CreateBitCast(Ops[0], 4410 llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2)); 4411 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4412 "vget_lane"); 4413 case NEON::BI__builtin_neon_vget_lane_f64: 4414 Ops[0] = Builder.CreateBitCast(Ops[0], 4415 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 1)); 4416 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4417 "vget_lane"); 4418 case NEON::BI__builtin_neon_vgetq_lane_f32: 4419 case NEON::BI__builtin_neon_vdups_laneq_f32: 4420 Ops[0] = Builder.CreateBitCast(Ops[0], 4421 llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 4)); 4422 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4423 "vgetq_lane"); 4424 case NEON::BI__builtin_neon_vgetq_lane_f64: 4425 case NEON::BI__builtin_neon_vdupd_laneq_f64: 4426 Ops[0] = Builder.CreateBitCast(Ops[0], 4427 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2)); 4428 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4429 "vgetq_lane"); 4430 case NEON::BI__builtin_neon_vaddd_s64: 4431 case NEON::BI__builtin_neon_vaddd_u64: 4432 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); 4433 case NEON::BI__builtin_neon_vsubd_s64: 4434 case NEON::BI__builtin_neon_vsubd_u64: 4435 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd"); 4436 case NEON::BI__builtin_neon_vqdmlalh_s16: 4437 case NEON::BI__builtin_neon_vqdmlslh_s16: { 4438 SmallVector<Value *, 2> ProductOps; 4439 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 4440 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); 4441 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 4442 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 4443 ProductOps, "vqdmlXl"); 4444 Constant *CI = ConstantInt::get(SizeTy, 0); 4445 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 4446 4447 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 4448 ? Intrinsic::aarch64_neon_sqadd 4449 : Intrinsic::aarch64_neon_sqsub; 4450 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl"); 4451 } 4452 case NEON::BI__builtin_neon_vqshlud_n_s64: { 4453 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4454 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 4455 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty), 4456 Ops, "vqshlu_n"); 4457 } 4458 case NEON::BI__builtin_neon_vqshld_n_u64: 4459 case NEON::BI__builtin_neon_vqshld_n_s64: { 4460 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64 4461 ? Intrinsic::aarch64_neon_uqshl 4462 : Intrinsic::aarch64_neon_sqshl; 4463 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4464 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 4465 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n"); 4466 } 4467 case NEON::BI__builtin_neon_vrshrd_n_u64: 4468 case NEON::BI__builtin_neon_vrshrd_n_s64: { 4469 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64 4470 ? Intrinsic::aarch64_neon_urshl 4471 : Intrinsic::aarch64_neon_srshl; 4472 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4473 int SV = cast<ConstantInt>(Ops[1])->getSExtValue(); 4474 Ops[1] = ConstantInt::get(Int64Ty, -SV); 4475 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n"); 4476 } 4477 case NEON::BI__builtin_neon_vrsrad_n_u64: 4478 case NEON::BI__builtin_neon_vrsrad_n_s64: { 4479 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64 4480 ? Intrinsic::aarch64_neon_urshl 4481 : Intrinsic::aarch64_neon_srshl; 4482 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 4483 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); 4484 Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Int64Ty), Ops[1], 4485 Builder.CreateSExt(Ops[2], Int64Ty)); 4486 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); 4487 } 4488 case NEON::BI__builtin_neon_vshld_n_s64: 4489 case NEON::BI__builtin_neon_vshld_n_u64: { 4490 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 4491 return Builder.CreateShl( 4492 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n"); 4493 } 4494 case NEON::BI__builtin_neon_vshrd_n_s64: { 4495 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 4496 return Builder.CreateAShr( 4497 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 4498 Amt->getZExtValue())), 4499 "shrd_n"); 4500 } 4501 case NEON::BI__builtin_neon_vshrd_n_u64: { 4502 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 4503 uint64_t ShiftAmt = Amt->getZExtValue(); 4504 // Right-shifting an unsigned value by its size yields 0. 4505 if (ShiftAmt == 64) 4506 return ConstantInt::get(Int64Ty, 0); 4507 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt), 4508 "shrd_n"); 4509 } 4510 case NEON::BI__builtin_neon_vsrad_n_s64: { 4511 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 4512 Ops[1] = Builder.CreateAShr( 4513 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 4514 Amt->getZExtValue())), 4515 "shrd_n"); 4516 return Builder.CreateAdd(Ops[0], Ops[1]); 4517 } 4518 case NEON::BI__builtin_neon_vsrad_n_u64: { 4519 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 4520 uint64_t ShiftAmt = Amt->getZExtValue(); 4521 // Right-shifting an unsigned value by its size yields 0. 4522 // As Op + 0 = Op, return Ops[0] directly. 4523 if (ShiftAmt == 64) 4524 return Ops[0]; 4525 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt), 4526 "shrd_n"); 4527 return Builder.CreateAdd(Ops[0], Ops[1]); 4528 } 4529 case NEON::BI__builtin_neon_vqdmlalh_lane_s16: 4530 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: 4531 case NEON::BI__builtin_neon_vqdmlslh_lane_s16: 4532 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: { 4533 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 4534 "lane"); 4535 SmallVector<Value *, 2> ProductOps; 4536 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 4537 ProductOps.push_back(vectorWrapScalar16(Ops[2])); 4538 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 4539 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 4540 ProductOps, "vqdmlXl"); 4541 Constant *CI = ConstantInt::get(SizeTy, 0); 4542 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 4543 Ops.pop_back(); 4544 4545 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 || 4546 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16) 4547 ? Intrinsic::aarch64_neon_sqadd 4548 : Intrinsic::aarch64_neon_sqsub; 4549 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl"); 4550 } 4551 case NEON::BI__builtin_neon_vqdmlals_s32: 4552 case NEON::BI__builtin_neon_vqdmlsls_s32: { 4553 SmallVector<Value *, 2> ProductOps; 4554 ProductOps.push_back(Ops[1]); 4555 ProductOps.push_back(EmitScalarExpr(E->getArg(2))); 4556 Ops[1] = 4557 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 4558 ProductOps, "vqdmlXl"); 4559 4560 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32 4561 ? Intrinsic::aarch64_neon_sqadd 4562 : Intrinsic::aarch64_neon_sqsub; 4563 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl"); 4564 } 4565 case NEON::BI__builtin_neon_vqdmlals_lane_s32: 4566 case NEON::BI__builtin_neon_vqdmlals_laneq_s32: 4567 case NEON::BI__builtin_neon_vqdmlsls_lane_s32: 4568 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { 4569 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 4570 "lane"); 4571 SmallVector<Value *, 2> ProductOps; 4572 ProductOps.push_back(Ops[1]); 4573 ProductOps.push_back(Ops[2]); 4574 Ops[1] = 4575 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 4576 ProductOps, "vqdmlXl"); 4577 Ops.pop_back(); 4578 4579 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 || 4580 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32) 4581 ? Intrinsic::aarch64_neon_sqadd 4582 : Intrinsic::aarch64_neon_sqsub; 4583 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); 4584 } 4585 } 4586 4587 llvm::VectorType *VTy = GetNeonType(this, Type); 4588 llvm::Type *Ty = VTy; 4589 if (!Ty) 4590 return nullptr; 4591 4592 // Not all intrinsics handled by the common case work for AArch64 yet, so only 4593 // defer to common code if it's been added to our special map. 4594 Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, 4595 AArch64SIMDIntrinsicsProvenSorted); 4596 4597 if (Builtin) 4598 return EmitCommonNeonBuiltinExpr( 4599 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 4600 Builtin->NameHint, Builtin->TypeModifier, E, Ops, nullptr); 4601 4602 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops)) 4603 return V; 4604 4605 unsigned Int; 4606 switch (BuiltinID) { 4607 default: return nullptr; 4608 case NEON::BI__builtin_neon_vbsl_v: 4609 case NEON::BI__builtin_neon_vbslq_v: { 4610 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy); 4611 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl"); 4612 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl"); 4613 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl"); 4614 4615 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl"); 4616 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl"); 4617 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl"); 4618 return Builder.CreateBitCast(Ops[0], Ty); 4619 } 4620 case NEON::BI__builtin_neon_vfma_lane_v: 4621 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types 4622 // The ARM builtins (and instructions) have the addend as the first 4623 // operand, but the 'fma' intrinsics have it last. Swap it around here. 4624 Value *Addend = Ops[0]; 4625 Value *Multiplicand = Ops[1]; 4626 Value *LaneSource = Ops[2]; 4627 Ops[0] = Multiplicand; 4628 Ops[1] = LaneSource; 4629 Ops[2] = Addend; 4630 4631 // Now adjust things to handle the lane access. 4632 llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ? 4633 llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) : 4634 VTy; 4635 llvm::Constant *cst = cast<Constant>(Ops[3]); 4636 Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst); 4637 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); 4638 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); 4639 4640 Ops.pop_back(); 4641 Int = Intrinsic::fma; 4642 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); 4643 } 4644 case NEON::BI__builtin_neon_vfma_laneq_v: { 4645 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 4646 // v1f64 fma should be mapped to Neon scalar f64 fma 4647 if (VTy && VTy->getElementType() == DoubleTy) { 4648 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 4649 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 4650 llvm::Type *VTy = GetNeonType(this, 4651 NeonTypeFlags(NeonTypeFlags::Float64, false, true)); 4652 Ops[2] = Builder.CreateBitCast(Ops[2], VTy); 4653 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 4654 Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); 4655 Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 4656 return Builder.CreateBitCast(Result, Ty); 4657 } 4658 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 4659 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4660 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4661 4662 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 4663 VTy->getNumElements() * 2); 4664 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 4665 Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), 4666 cast<ConstantInt>(Ops[3])); 4667 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 4668 4669 return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); 4670 } 4671 case NEON::BI__builtin_neon_vfmaq_laneq_v: { 4672 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 4673 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4674 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4675 4676 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4677 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 4678 return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); 4679 } 4680 case NEON::BI__builtin_neon_vfmas_lane_f32: 4681 case NEON::BI__builtin_neon_vfmas_laneq_f32: 4682 case NEON::BI__builtin_neon_vfmad_lane_f64: 4683 case NEON::BI__builtin_neon_vfmad_laneq_f64: { 4684 Ops.push_back(EmitScalarExpr(E->getArg(3))); 4685 llvm::Type *Ty = ConvertType(E->getCallReturnType()); 4686 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 4687 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 4688 return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 4689 } 4690 case NEON::BI__builtin_neon_vfms_v: 4691 case NEON::BI__builtin_neon_vfmsq_v: { // Only used for FP types 4692 // FIXME: probably remove when we no longer support aarch64_simd.h 4693 // (arm_neon.h delegates to vfma). 4694 4695 // The ARM builtins (and instructions) have the addend as the first 4696 // operand, but the 'fma' intrinsics have it last. Swap it around here. 4697 Value *Subtrahend = Ops[0]; 4698 Value *Multiplicand = Ops[2]; 4699 Ops[0] = Multiplicand; 4700 Ops[2] = Subtrahend; 4701 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 4702 Ops[1] = Builder.CreateFNeg(Ops[1]); 4703 Int = Intrinsic::fma; 4704 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmls"); 4705 } 4706 case NEON::BI__builtin_neon_vmull_v: 4707 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4708 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; 4709 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull; 4710 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 4711 case NEON::BI__builtin_neon_vmax_v: 4712 case NEON::BI__builtin_neon_vmaxq_v: 4713 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4714 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; 4715 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; 4716 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); 4717 case NEON::BI__builtin_neon_vmin_v: 4718 case NEON::BI__builtin_neon_vminq_v: 4719 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4720 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; 4721 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; 4722 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); 4723 case NEON::BI__builtin_neon_vabd_v: 4724 case NEON::BI__builtin_neon_vabdq_v: 4725 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4726 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd; 4727 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd; 4728 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); 4729 case NEON::BI__builtin_neon_vpadal_v: 4730 case NEON::BI__builtin_neon_vpadalq_v: { 4731 unsigned ArgElts = VTy->getNumElements(); 4732 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType()); 4733 unsigned BitWidth = EltTy->getBitWidth(); 4734 llvm::Type *ArgTy = llvm::VectorType::get( 4735 llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts); 4736 llvm::Type* Tys[2] = { VTy, ArgTy }; 4737 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp; 4738 SmallVector<llvm::Value*, 1> TmpOps; 4739 TmpOps.push_back(Ops[1]); 4740 Function *F = CGM.getIntrinsic(Int, Tys); 4741 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal"); 4742 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType()); 4743 return Builder.CreateAdd(tmp, addend); 4744 } 4745 case NEON::BI__builtin_neon_vpmin_v: 4746 case NEON::BI__builtin_neon_vpminq_v: 4747 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4748 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp; 4749 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp; 4750 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 4751 case NEON::BI__builtin_neon_vpmax_v: 4752 case NEON::BI__builtin_neon_vpmaxq_v: 4753 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4754 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp; 4755 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp; 4756 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 4757 case NEON::BI__builtin_neon_vminnm_v: 4758 case NEON::BI__builtin_neon_vminnmq_v: 4759 Int = Intrinsic::aarch64_neon_fminnm; 4760 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); 4761 case NEON::BI__builtin_neon_vmaxnm_v: 4762 case NEON::BI__builtin_neon_vmaxnmq_v: 4763 Int = Intrinsic::aarch64_neon_fmaxnm; 4764 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); 4765 case NEON::BI__builtin_neon_vrecpss_f32: { 4766 llvm::Type *f32Type = llvm::Type::getFloatTy(getLLVMContext()); 4767 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4768 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, f32Type), 4769 Ops, "vrecps"); 4770 } 4771 case NEON::BI__builtin_neon_vrecpsd_f64: { 4772 llvm::Type *f64Type = llvm::Type::getDoubleTy(getLLVMContext()); 4773 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4774 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, f64Type), 4775 Ops, "vrecps"); 4776 } 4777 case NEON::BI__builtin_neon_vqshrun_n_v: 4778 Int = Intrinsic::aarch64_neon_sqshrun; 4779 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); 4780 case NEON::BI__builtin_neon_vqrshrun_n_v: 4781 Int = Intrinsic::aarch64_neon_sqrshrun; 4782 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); 4783 case NEON::BI__builtin_neon_vqshrn_n_v: 4784 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn; 4785 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); 4786 case NEON::BI__builtin_neon_vrshrn_n_v: 4787 Int = Intrinsic::aarch64_neon_rshrn; 4788 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); 4789 case NEON::BI__builtin_neon_vqrshrn_n_v: 4790 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; 4791 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); 4792 case NEON::BI__builtin_neon_vrnda_v: 4793 case NEON::BI__builtin_neon_vrndaq_v: { 4794 Int = Intrinsic::round; 4795 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); 4796 } 4797 case NEON::BI__builtin_neon_vrndi_v: 4798 case NEON::BI__builtin_neon_vrndiq_v: { 4799 Int = Intrinsic::nearbyint; 4800 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); 4801 } 4802 case NEON::BI__builtin_neon_vrndm_v: 4803 case NEON::BI__builtin_neon_vrndmq_v: { 4804 Int = Intrinsic::floor; 4805 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); 4806 } 4807 case NEON::BI__builtin_neon_vrndn_v: 4808 case NEON::BI__builtin_neon_vrndnq_v: { 4809 Int = Intrinsic::aarch64_neon_frintn; 4810 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); 4811 } 4812 case NEON::BI__builtin_neon_vrndp_v: 4813 case NEON::BI__builtin_neon_vrndpq_v: { 4814 Int = Intrinsic::ceil; 4815 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); 4816 } 4817 case NEON::BI__builtin_neon_vrndx_v: 4818 case NEON::BI__builtin_neon_vrndxq_v: { 4819 Int = Intrinsic::rint; 4820 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); 4821 } 4822 case NEON::BI__builtin_neon_vrnd_v: 4823 case NEON::BI__builtin_neon_vrndq_v: { 4824 Int = Intrinsic::trunc; 4825 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); 4826 } 4827 case NEON::BI__builtin_neon_vceqz_v: 4828 case NEON::BI__builtin_neon_vceqzq_v: 4829 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, 4830 ICmpInst::ICMP_EQ, "vceqz"); 4831 case NEON::BI__builtin_neon_vcgez_v: 4832 case NEON::BI__builtin_neon_vcgezq_v: 4833 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, 4834 ICmpInst::ICMP_SGE, "vcgez"); 4835 case NEON::BI__builtin_neon_vclez_v: 4836 case NEON::BI__builtin_neon_vclezq_v: 4837 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, 4838 ICmpInst::ICMP_SLE, "vclez"); 4839 case NEON::BI__builtin_neon_vcgtz_v: 4840 case NEON::BI__builtin_neon_vcgtzq_v: 4841 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, 4842 ICmpInst::ICMP_SGT, "vcgtz"); 4843 case NEON::BI__builtin_neon_vcltz_v: 4844 case NEON::BI__builtin_neon_vcltzq_v: 4845 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, 4846 ICmpInst::ICMP_SLT, "vcltz"); 4847 case NEON::BI__builtin_neon_vcvt_f64_v: 4848 case NEON::BI__builtin_neon_vcvtq_f64_v: 4849 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4850 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 4851 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 4852 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 4853 case NEON::BI__builtin_neon_vcvt_f64_f32: { 4854 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && 4855 "unexpected vcvt_f64_f32 builtin"); 4856 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); 4857 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 4858 4859 return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); 4860 } 4861 case NEON::BI__builtin_neon_vcvt_f32_f64: { 4862 assert(Type.getEltType() == NeonTypeFlags::Float32 && 4863 "unexpected vcvt_f32_f64 builtin"); 4864 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); 4865 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 4866 4867 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); 4868 } 4869 case NEON::BI__builtin_neon_vcvt_s32_v: 4870 case NEON::BI__builtin_neon_vcvt_u32_v: 4871 case NEON::BI__builtin_neon_vcvt_s64_v: 4872 case NEON::BI__builtin_neon_vcvt_u64_v: 4873 case NEON::BI__builtin_neon_vcvtq_s32_v: 4874 case NEON::BI__builtin_neon_vcvtq_u32_v: 4875 case NEON::BI__builtin_neon_vcvtq_s64_v: 4876 case NEON::BI__builtin_neon_vcvtq_u64_v: { 4877 bool Double = 4878 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 4879 llvm::Type *InTy = 4880 GetNeonType(this, 4881 NeonTypeFlags(Double ? NeonTypeFlags::Float64 4882 : NeonTypeFlags::Float32, false, quad)); 4883 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 4884 if (usgn) 4885 return Builder.CreateFPToUI(Ops[0], Ty); 4886 return Builder.CreateFPToSI(Ops[0], Ty); 4887 } 4888 case NEON::BI__builtin_neon_vcvta_s32_v: 4889 case NEON::BI__builtin_neon_vcvtaq_s32_v: 4890 case NEON::BI__builtin_neon_vcvta_u32_v: 4891 case NEON::BI__builtin_neon_vcvtaq_u32_v: 4892 case NEON::BI__builtin_neon_vcvta_s64_v: 4893 case NEON::BI__builtin_neon_vcvtaq_s64_v: 4894 case NEON::BI__builtin_neon_vcvta_u64_v: 4895 case NEON::BI__builtin_neon_vcvtaq_u64_v: { 4896 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas; 4897 bool Double = 4898 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 4899 llvm::Type *InTy = 4900 GetNeonType(this, 4901 NeonTypeFlags(Double ? NeonTypeFlags::Float64 4902 : NeonTypeFlags::Float32, false, quad)); 4903 llvm::Type *Tys[2] = { Ty, InTy }; 4904 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); 4905 } 4906 case NEON::BI__builtin_neon_vcvtm_s32_v: 4907 case NEON::BI__builtin_neon_vcvtmq_s32_v: 4908 case NEON::BI__builtin_neon_vcvtm_u32_v: 4909 case NEON::BI__builtin_neon_vcvtmq_u32_v: 4910 case NEON::BI__builtin_neon_vcvtm_s64_v: 4911 case NEON::BI__builtin_neon_vcvtmq_s64_v: 4912 case NEON::BI__builtin_neon_vcvtm_u64_v: 4913 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 4914 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms; 4915 bool Double = 4916 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 4917 llvm::Type *InTy = 4918 GetNeonType(this, 4919 NeonTypeFlags(Double ? NeonTypeFlags::Float64 4920 : NeonTypeFlags::Float32, false, quad)); 4921 llvm::Type *Tys[2] = { Ty, InTy }; 4922 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); 4923 } 4924 case NEON::BI__builtin_neon_vcvtn_s32_v: 4925 case NEON::BI__builtin_neon_vcvtnq_s32_v: 4926 case NEON::BI__builtin_neon_vcvtn_u32_v: 4927 case NEON::BI__builtin_neon_vcvtnq_u32_v: 4928 case NEON::BI__builtin_neon_vcvtn_s64_v: 4929 case NEON::BI__builtin_neon_vcvtnq_s64_v: 4930 case NEON::BI__builtin_neon_vcvtn_u64_v: 4931 case NEON::BI__builtin_neon_vcvtnq_u64_v: { 4932 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns; 4933 bool Double = 4934 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 4935 llvm::Type *InTy = 4936 GetNeonType(this, 4937 NeonTypeFlags(Double ? NeonTypeFlags::Float64 4938 : NeonTypeFlags::Float32, false, quad)); 4939 llvm::Type *Tys[2] = { Ty, InTy }; 4940 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); 4941 } 4942 case NEON::BI__builtin_neon_vcvtp_s32_v: 4943 case NEON::BI__builtin_neon_vcvtpq_s32_v: 4944 case NEON::BI__builtin_neon_vcvtp_u32_v: 4945 case NEON::BI__builtin_neon_vcvtpq_u32_v: 4946 case NEON::BI__builtin_neon_vcvtp_s64_v: 4947 case NEON::BI__builtin_neon_vcvtpq_s64_v: 4948 case NEON::BI__builtin_neon_vcvtp_u64_v: 4949 case NEON::BI__builtin_neon_vcvtpq_u64_v: { 4950 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps; 4951 bool Double = 4952 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 4953 llvm::Type *InTy = 4954 GetNeonType(this, 4955 NeonTypeFlags(Double ? NeonTypeFlags::Float64 4956 : NeonTypeFlags::Float32, false, quad)); 4957 llvm::Type *Tys[2] = { Ty, InTy }; 4958 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); 4959 } 4960 case NEON::BI__builtin_neon_vmulx_v: 4961 case NEON::BI__builtin_neon_vmulxq_v: { 4962 Int = Intrinsic::aarch64_neon_fmulx; 4963 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 4964 } 4965 case NEON::BI__builtin_neon_vmul_lane_v: 4966 case NEON::BI__builtin_neon_vmul_laneq_v: { 4967 // v1f64 vmul_lane should be mapped to Neon scalar mul lane 4968 bool Quad = false; 4969 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) 4970 Quad = true; 4971 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 4972 llvm::Type *VTy = GetNeonType(this, 4973 NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); 4974 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 4975 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 4976 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); 4977 return Builder.CreateBitCast(Result, Ty); 4978 } 4979 case NEON::BI__builtin_neon_vnegd_s64: 4980 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); 4981 case NEON::BI__builtin_neon_vpmaxnm_v: 4982 case NEON::BI__builtin_neon_vpmaxnmq_v: { 4983 Int = Intrinsic::aarch64_neon_fmaxnmp; 4984 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); 4985 } 4986 case NEON::BI__builtin_neon_vpminnm_v: 4987 case NEON::BI__builtin_neon_vpminnmq_v: { 4988 Int = Intrinsic::aarch64_neon_fminnmp; 4989 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); 4990 } 4991 case NEON::BI__builtin_neon_vsqrt_v: 4992 case NEON::BI__builtin_neon_vsqrtq_v: { 4993 Int = Intrinsic::sqrt; 4994 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4995 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); 4996 } 4997 case NEON::BI__builtin_neon_vrbit_v: 4998 case NEON::BI__builtin_neon_vrbitq_v: { 4999 Int = Intrinsic::aarch64_neon_rbit; 5000 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); 5001 } 5002 case NEON::BI__builtin_neon_vaddv_u8: 5003 // FIXME: These are handled by the AArch64 scalar code. 5004 usgn = true; 5005 // FALLTHROUGH 5006 case NEON::BI__builtin_neon_vaddv_s8: { 5007 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 5008 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5009 VTy = 5010 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5011 llvm::Type *Tys[2] = { Ty, VTy }; 5012 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5013 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 5014 return Builder.CreateTrunc(Ops[0], 5015 llvm::IntegerType::get(getLLVMContext(), 8)); 5016 } 5017 case NEON::BI__builtin_neon_vaddv_u16: 5018 usgn = true; 5019 // FALLTHROUGH 5020 case NEON::BI__builtin_neon_vaddv_s16: { 5021 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 5022 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5023 VTy = 5024 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5025 llvm::Type *Tys[2] = { Ty, VTy }; 5026 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5027 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 5028 return Builder.CreateTrunc(Ops[0], 5029 llvm::IntegerType::get(getLLVMContext(), 16)); 5030 } 5031 case NEON::BI__builtin_neon_vaddvq_u8: 5032 usgn = true; 5033 // FALLTHROUGH 5034 case NEON::BI__builtin_neon_vaddvq_s8: { 5035 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 5036 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5037 VTy = 5038 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5039 llvm::Type *Tys[2] = { Ty, VTy }; 5040 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5041 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 5042 return Builder.CreateTrunc(Ops[0], 5043 llvm::IntegerType::get(getLLVMContext(), 8)); 5044 } 5045 case NEON::BI__builtin_neon_vaddvq_u16: 5046 usgn = true; 5047 // FALLTHROUGH 5048 case NEON::BI__builtin_neon_vaddvq_s16: { 5049 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 5050 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5051 VTy = 5052 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5053 llvm::Type *Tys[2] = { Ty, VTy }; 5054 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5055 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 5056 return Builder.CreateTrunc(Ops[0], 5057 llvm::IntegerType::get(getLLVMContext(), 16)); 5058 } 5059 case NEON::BI__builtin_neon_vmaxv_u8: { 5060 Int = Intrinsic::aarch64_neon_umaxv; 5061 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5062 VTy = 5063 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5064 llvm::Type *Tys[2] = { Ty, VTy }; 5065 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5066 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5067 return Builder.CreateTrunc(Ops[0], 5068 llvm::IntegerType::get(getLLVMContext(), 8)); 5069 } 5070 case NEON::BI__builtin_neon_vmaxv_u16: { 5071 Int = Intrinsic::aarch64_neon_umaxv; 5072 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5073 VTy = 5074 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5075 llvm::Type *Tys[2] = { Ty, VTy }; 5076 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5077 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5078 return Builder.CreateTrunc(Ops[0], 5079 llvm::IntegerType::get(getLLVMContext(), 16)); 5080 } 5081 case NEON::BI__builtin_neon_vmaxvq_u8: { 5082 Int = Intrinsic::aarch64_neon_umaxv; 5083 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5084 VTy = 5085 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5086 llvm::Type *Tys[2] = { Ty, VTy }; 5087 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5088 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5089 return Builder.CreateTrunc(Ops[0], 5090 llvm::IntegerType::get(getLLVMContext(), 8)); 5091 } 5092 case NEON::BI__builtin_neon_vmaxvq_u16: { 5093 Int = Intrinsic::aarch64_neon_umaxv; 5094 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5095 VTy = 5096 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5097 llvm::Type *Tys[2] = { Ty, VTy }; 5098 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5099 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5100 return Builder.CreateTrunc(Ops[0], 5101 llvm::IntegerType::get(getLLVMContext(), 16)); 5102 } 5103 case NEON::BI__builtin_neon_vmaxv_s8: { 5104 Int = Intrinsic::aarch64_neon_smaxv; 5105 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5106 VTy = 5107 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5108 llvm::Type *Tys[2] = { Ty, VTy }; 5109 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5110 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5111 return Builder.CreateTrunc(Ops[0], 5112 llvm::IntegerType::get(getLLVMContext(), 8)); 5113 } 5114 case NEON::BI__builtin_neon_vmaxv_s16: { 5115 Int = Intrinsic::aarch64_neon_smaxv; 5116 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5117 VTy = 5118 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5119 llvm::Type *Tys[2] = { Ty, VTy }; 5120 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5121 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5122 return Builder.CreateTrunc(Ops[0], 5123 llvm::IntegerType::get(getLLVMContext(), 16)); 5124 } 5125 case NEON::BI__builtin_neon_vmaxvq_s8: { 5126 Int = Intrinsic::aarch64_neon_smaxv; 5127 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5128 VTy = 5129 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5130 llvm::Type *Tys[2] = { Ty, VTy }; 5131 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5132 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5133 return Builder.CreateTrunc(Ops[0], 5134 llvm::IntegerType::get(getLLVMContext(), 8)); 5135 } 5136 case NEON::BI__builtin_neon_vmaxvq_s16: { 5137 Int = Intrinsic::aarch64_neon_smaxv; 5138 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5139 VTy = 5140 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5141 llvm::Type *Tys[2] = { Ty, VTy }; 5142 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5143 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5144 return Builder.CreateTrunc(Ops[0], 5145 llvm::IntegerType::get(getLLVMContext(), 16)); 5146 } 5147 case NEON::BI__builtin_neon_vminv_u8: { 5148 Int = Intrinsic::aarch64_neon_uminv; 5149 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5150 VTy = 5151 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5152 llvm::Type *Tys[2] = { Ty, VTy }; 5153 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5154 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5155 return Builder.CreateTrunc(Ops[0], 5156 llvm::IntegerType::get(getLLVMContext(), 8)); 5157 } 5158 case NEON::BI__builtin_neon_vminv_u16: { 5159 Int = Intrinsic::aarch64_neon_uminv; 5160 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5161 VTy = 5162 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5163 llvm::Type *Tys[2] = { Ty, VTy }; 5164 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5165 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5166 return Builder.CreateTrunc(Ops[0], 5167 llvm::IntegerType::get(getLLVMContext(), 16)); 5168 } 5169 case NEON::BI__builtin_neon_vminvq_u8: { 5170 Int = Intrinsic::aarch64_neon_uminv; 5171 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5172 VTy = 5173 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5174 llvm::Type *Tys[2] = { Ty, VTy }; 5175 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5176 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5177 return Builder.CreateTrunc(Ops[0], 5178 llvm::IntegerType::get(getLLVMContext(), 8)); 5179 } 5180 case NEON::BI__builtin_neon_vminvq_u16: { 5181 Int = Intrinsic::aarch64_neon_uminv; 5182 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5183 VTy = 5184 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5185 llvm::Type *Tys[2] = { Ty, VTy }; 5186 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5187 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5188 return Builder.CreateTrunc(Ops[0], 5189 llvm::IntegerType::get(getLLVMContext(), 16)); 5190 } 5191 case NEON::BI__builtin_neon_vminv_s8: { 5192 Int = Intrinsic::aarch64_neon_sminv; 5193 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5194 VTy = 5195 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5196 llvm::Type *Tys[2] = { Ty, VTy }; 5197 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5198 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5199 return Builder.CreateTrunc(Ops[0], 5200 llvm::IntegerType::get(getLLVMContext(), 8)); 5201 } 5202 case NEON::BI__builtin_neon_vminv_s16: { 5203 Int = Intrinsic::aarch64_neon_sminv; 5204 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5205 VTy = 5206 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5207 llvm::Type *Tys[2] = { Ty, VTy }; 5208 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5209 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5210 return Builder.CreateTrunc(Ops[0], 5211 llvm::IntegerType::get(getLLVMContext(), 16)); 5212 } 5213 case NEON::BI__builtin_neon_vminvq_s8: { 5214 Int = Intrinsic::aarch64_neon_sminv; 5215 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5216 VTy = 5217 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5218 llvm::Type *Tys[2] = { Ty, VTy }; 5219 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5220 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5221 return Builder.CreateTrunc(Ops[0], 5222 llvm::IntegerType::get(getLLVMContext(), 8)); 5223 } 5224 case NEON::BI__builtin_neon_vminvq_s16: { 5225 Int = Intrinsic::aarch64_neon_sminv; 5226 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5227 VTy = 5228 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5229 llvm::Type *Tys[2] = { Ty, VTy }; 5230 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5231 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5232 return Builder.CreateTrunc(Ops[0], 5233 llvm::IntegerType::get(getLLVMContext(), 16)); 5234 } 5235 case NEON::BI__builtin_neon_vmul_n_f64: { 5236 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5237 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); 5238 return Builder.CreateFMul(Ops[0], RHS); 5239 } 5240 case NEON::BI__builtin_neon_vaddlv_u8: { 5241 Int = Intrinsic::aarch64_neon_uaddlv; 5242 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5243 VTy = 5244 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5245 llvm::Type *Tys[2] = { Ty, VTy }; 5246 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5247 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5248 return Builder.CreateTrunc(Ops[0], 5249 llvm::IntegerType::get(getLLVMContext(), 16)); 5250 } 5251 case NEON::BI__builtin_neon_vaddlv_u16: { 5252 Int = Intrinsic::aarch64_neon_uaddlv; 5253 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5254 VTy = 5255 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5256 llvm::Type *Tys[2] = { Ty, VTy }; 5257 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5258 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5259 } 5260 case NEON::BI__builtin_neon_vaddlvq_u8: { 5261 Int = Intrinsic::aarch64_neon_uaddlv; 5262 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5263 VTy = 5264 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5265 llvm::Type *Tys[2] = { Ty, VTy }; 5266 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5267 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5268 return Builder.CreateTrunc(Ops[0], 5269 llvm::IntegerType::get(getLLVMContext(), 16)); 5270 } 5271 case NEON::BI__builtin_neon_vaddlvq_u16: { 5272 Int = Intrinsic::aarch64_neon_uaddlv; 5273 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5274 VTy = 5275 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5276 llvm::Type *Tys[2] = { Ty, VTy }; 5277 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5278 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5279 } 5280 case NEON::BI__builtin_neon_vaddlv_s8: { 5281 Int = Intrinsic::aarch64_neon_saddlv; 5282 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5283 VTy = 5284 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5285 llvm::Type *Tys[2] = { Ty, VTy }; 5286 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5287 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5288 return Builder.CreateTrunc(Ops[0], 5289 llvm::IntegerType::get(getLLVMContext(), 16)); 5290 } 5291 case NEON::BI__builtin_neon_vaddlv_s16: { 5292 Int = Intrinsic::aarch64_neon_saddlv; 5293 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5294 VTy = 5295 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5296 llvm::Type *Tys[2] = { Ty, VTy }; 5297 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5298 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5299 } 5300 case NEON::BI__builtin_neon_vaddlvq_s8: { 5301 Int = Intrinsic::aarch64_neon_saddlv; 5302 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5303 VTy = 5304 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5305 llvm::Type *Tys[2] = { Ty, VTy }; 5306 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5307 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5308 return Builder.CreateTrunc(Ops[0], 5309 llvm::IntegerType::get(getLLVMContext(), 16)); 5310 } 5311 case NEON::BI__builtin_neon_vaddlvq_s16: { 5312 Int = Intrinsic::aarch64_neon_saddlv; 5313 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5314 VTy = 5315 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5316 llvm::Type *Tys[2] = { Ty, VTy }; 5317 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5318 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5319 } 5320 case NEON::BI__builtin_neon_vsri_n_v: 5321 case NEON::BI__builtin_neon_vsriq_n_v: { 5322 Int = Intrinsic::aarch64_neon_vsri; 5323 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 5324 return EmitNeonCall(Intrin, Ops, "vsri_n"); 5325 } 5326 case NEON::BI__builtin_neon_vsli_n_v: 5327 case NEON::BI__builtin_neon_vsliq_n_v: { 5328 Int = Intrinsic::aarch64_neon_vsli; 5329 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 5330 return EmitNeonCall(Intrin, Ops, "vsli_n"); 5331 } 5332 case NEON::BI__builtin_neon_vsra_n_v: 5333 case NEON::BI__builtin_neon_vsraq_n_v: 5334 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5335 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 5336 return Builder.CreateAdd(Ops[0], Ops[1]); 5337 case NEON::BI__builtin_neon_vrsra_n_v: 5338 case NEON::BI__builtin_neon_vrsraq_n_v: { 5339 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; 5340 SmallVector<llvm::Value*,2> TmpOps; 5341 TmpOps.push_back(Ops[1]); 5342 TmpOps.push_back(Ops[2]); 5343 Function* F = CGM.getIntrinsic(Int, Ty); 5344 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true); 5345 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 5346 return Builder.CreateAdd(Ops[0], tmp); 5347 } 5348 // FIXME: Sharing loads & stores with 32-bit is complicated by the absence 5349 // of an Align parameter here. 5350 case NEON::BI__builtin_neon_vld1_x2_v: 5351 case NEON::BI__builtin_neon_vld1q_x2_v: 5352 case NEON::BI__builtin_neon_vld1_x3_v: 5353 case NEON::BI__builtin_neon_vld1q_x3_v: 5354 case NEON::BI__builtin_neon_vld1_x4_v: 5355 case NEON::BI__builtin_neon_vld1q_x4_v: { 5356 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 5357 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5358 llvm::Type *Tys[2] = { VTy, PTy }; 5359 unsigned Int; 5360 switch (BuiltinID) { 5361 case NEON::BI__builtin_neon_vld1_x2_v: 5362 case NEON::BI__builtin_neon_vld1q_x2_v: 5363 Int = Intrinsic::aarch64_neon_ld1x2; 5364 break; 5365 case NEON::BI__builtin_neon_vld1_x3_v: 5366 case NEON::BI__builtin_neon_vld1q_x3_v: 5367 Int = Intrinsic::aarch64_neon_ld1x3; 5368 break; 5369 case NEON::BI__builtin_neon_vld1_x4_v: 5370 case NEON::BI__builtin_neon_vld1q_x4_v: 5371 Int = Intrinsic::aarch64_neon_ld1x4; 5372 break; 5373 } 5374 Function *F = CGM.getIntrinsic(Int, Tys); 5375 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); 5376 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5377 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5378 return Builder.CreateStore(Ops[1], Ops[0]); 5379 } 5380 case NEON::BI__builtin_neon_vst1_x2_v: 5381 case NEON::BI__builtin_neon_vst1q_x2_v: 5382 case NEON::BI__builtin_neon_vst1_x3_v: 5383 case NEON::BI__builtin_neon_vst1q_x3_v: 5384 case NEON::BI__builtin_neon_vst1_x4_v: 5385 case NEON::BI__builtin_neon_vst1q_x4_v: { 5386 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 5387 llvm::Type *Tys[2] = { VTy, PTy }; 5388 unsigned Int; 5389 switch (BuiltinID) { 5390 case NEON::BI__builtin_neon_vst1_x2_v: 5391 case NEON::BI__builtin_neon_vst1q_x2_v: 5392 Int = Intrinsic::aarch64_neon_st1x2; 5393 break; 5394 case NEON::BI__builtin_neon_vst1_x3_v: 5395 case NEON::BI__builtin_neon_vst1q_x3_v: 5396 Int = Intrinsic::aarch64_neon_st1x3; 5397 break; 5398 case NEON::BI__builtin_neon_vst1_x4_v: 5399 case NEON::BI__builtin_neon_vst1q_x4_v: 5400 Int = Intrinsic::aarch64_neon_st1x4; 5401 break; 5402 } 5403 SmallVector<Value *, 4> IntOps(Ops.begin()+1, Ops.end()); 5404 IntOps.push_back(Ops[0]); 5405 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), IntOps, ""); 5406 } 5407 case NEON::BI__builtin_neon_vld1_v: 5408 case NEON::BI__builtin_neon_vld1q_v: 5409 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 5410 return Builder.CreateLoad(Ops[0]); 5411 case NEON::BI__builtin_neon_vst1_v: 5412 case NEON::BI__builtin_neon_vst1q_v: 5413 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 5414 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 5415 return Builder.CreateStore(Ops[1], Ops[0]); 5416 case NEON::BI__builtin_neon_vld1_lane_v: 5417 case NEON::BI__builtin_neon_vld1q_lane_v: 5418 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5419 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 5420 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5421 Ops[0] = Builder.CreateLoad(Ops[0]); 5422 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 5423 case NEON::BI__builtin_neon_vld1_dup_v: 5424 case NEON::BI__builtin_neon_vld1q_dup_v: { 5425 Value *V = UndefValue::get(Ty); 5426 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 5427 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5428 Ops[0] = Builder.CreateLoad(Ops[0]); 5429 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 5430 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 5431 return EmitNeonSplat(Ops[0], CI); 5432 } 5433 case NEON::BI__builtin_neon_vst1_lane_v: 5434 case NEON::BI__builtin_neon_vst1q_lane_v: 5435 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5436 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 5437 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5438 return Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty)); 5439 case NEON::BI__builtin_neon_vld2_v: 5440 case NEON::BI__builtin_neon_vld2q_v: { 5441 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 5442 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5443 llvm::Type *Tys[2] = { VTy, PTy }; 5444 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); 5445 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 5446 Ops[0] = Builder.CreateBitCast(Ops[0], 5447 llvm::PointerType::getUnqual(Ops[1]->getType())); 5448 return Builder.CreateStore(Ops[1], Ops[0]); 5449 } 5450 case NEON::BI__builtin_neon_vld3_v: 5451 case NEON::BI__builtin_neon_vld3q_v: { 5452 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 5453 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5454 llvm::Type *Tys[2] = { VTy, PTy }; 5455 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); 5456 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 5457 Ops[0] = Builder.CreateBitCast(Ops[0], 5458 llvm::PointerType::getUnqual(Ops[1]->getType())); 5459 return Builder.CreateStore(Ops[1], Ops[0]); 5460 } 5461 case NEON::BI__builtin_neon_vld4_v: 5462 case NEON::BI__builtin_neon_vld4q_v: { 5463 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 5464 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5465 llvm::Type *Tys[2] = { VTy, PTy }; 5466 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); 5467 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 5468 Ops[0] = Builder.CreateBitCast(Ops[0], 5469 llvm::PointerType::getUnqual(Ops[1]->getType())); 5470 return Builder.CreateStore(Ops[1], Ops[0]); 5471 } 5472 case NEON::BI__builtin_neon_vld2_dup_v: 5473 case NEON::BI__builtin_neon_vld2q_dup_v: { 5474 llvm::Type *PTy = 5475 llvm::PointerType::getUnqual(VTy->getElementType()); 5476 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5477 llvm::Type *Tys[2] = { VTy, PTy }; 5478 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); 5479 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 5480 Ops[0] = Builder.CreateBitCast(Ops[0], 5481 llvm::PointerType::getUnqual(Ops[1]->getType())); 5482 return Builder.CreateStore(Ops[1], Ops[0]); 5483 } 5484 case NEON::BI__builtin_neon_vld3_dup_v: 5485 case NEON::BI__builtin_neon_vld3q_dup_v: { 5486 llvm::Type *PTy = 5487 llvm::PointerType::getUnqual(VTy->getElementType()); 5488 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5489 llvm::Type *Tys[2] = { VTy, PTy }; 5490 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); 5491 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 5492 Ops[0] = Builder.CreateBitCast(Ops[0], 5493 llvm::PointerType::getUnqual(Ops[1]->getType())); 5494 return Builder.CreateStore(Ops[1], Ops[0]); 5495 } 5496 case NEON::BI__builtin_neon_vld4_dup_v: 5497 case NEON::BI__builtin_neon_vld4q_dup_v: { 5498 llvm::Type *PTy = 5499 llvm::PointerType::getUnqual(VTy->getElementType()); 5500 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5501 llvm::Type *Tys[2] = { VTy, PTy }; 5502 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); 5503 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 5504 Ops[0] = Builder.CreateBitCast(Ops[0], 5505 llvm::PointerType::getUnqual(Ops[1]->getType())); 5506 return Builder.CreateStore(Ops[1], Ops[0]); 5507 } 5508 case NEON::BI__builtin_neon_vld2_lane_v: 5509 case NEON::BI__builtin_neon_vld2q_lane_v: { 5510 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 5511 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); 5512 Ops.push_back(Ops[1]); 5513 Ops.erase(Ops.begin()+1); 5514 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5515 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5516 Ops[3] = Builder.CreateZExt(Ops[3], 5517 llvm::IntegerType::get(getLLVMContext(), 64)); 5518 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); 5519 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5520 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5521 return Builder.CreateStore(Ops[1], Ops[0]); 5522 } 5523 case NEON::BI__builtin_neon_vld3_lane_v: 5524 case NEON::BI__builtin_neon_vld3q_lane_v: { 5525 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 5526 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); 5527 Ops.push_back(Ops[1]); 5528 Ops.erase(Ops.begin()+1); 5529 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5530 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5531 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 5532 Ops[4] = Builder.CreateZExt(Ops[4], 5533 llvm::IntegerType::get(getLLVMContext(), 64)); 5534 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); 5535 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5536 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5537 return Builder.CreateStore(Ops[1], Ops[0]); 5538 } 5539 case NEON::BI__builtin_neon_vld4_lane_v: 5540 case NEON::BI__builtin_neon_vld4q_lane_v: { 5541 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 5542 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); 5543 Ops.push_back(Ops[1]); 5544 Ops.erase(Ops.begin()+1); 5545 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5546 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5547 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 5548 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 5549 Ops[5] = Builder.CreateZExt(Ops[5], 5550 llvm::IntegerType::get(getLLVMContext(), 64)); 5551 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane"); 5552 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5553 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5554 return Builder.CreateStore(Ops[1], Ops[0]); 5555 } 5556 case NEON::BI__builtin_neon_vst2_v: 5557 case NEON::BI__builtin_neon_vst2q_v: { 5558 Ops.push_back(Ops[0]); 5559 Ops.erase(Ops.begin()); 5560 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; 5561 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), 5562 Ops, ""); 5563 } 5564 case NEON::BI__builtin_neon_vst2_lane_v: 5565 case NEON::BI__builtin_neon_vst2q_lane_v: { 5566 Ops.push_back(Ops[0]); 5567 Ops.erase(Ops.begin()); 5568 Ops[2] = Builder.CreateZExt(Ops[2], 5569 llvm::IntegerType::get(getLLVMContext(), 64)); 5570 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 5571 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), 5572 Ops, ""); 5573 } 5574 case NEON::BI__builtin_neon_vst3_v: 5575 case NEON::BI__builtin_neon_vst3q_v: { 5576 Ops.push_back(Ops[0]); 5577 Ops.erase(Ops.begin()); 5578 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 5579 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), 5580 Ops, ""); 5581 } 5582 case NEON::BI__builtin_neon_vst3_lane_v: 5583 case NEON::BI__builtin_neon_vst3q_lane_v: { 5584 Ops.push_back(Ops[0]); 5585 Ops.erase(Ops.begin()); 5586 Ops[3] = Builder.CreateZExt(Ops[3], 5587 llvm::IntegerType::get(getLLVMContext(), 64)); 5588 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 5589 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), 5590 Ops, ""); 5591 } 5592 case NEON::BI__builtin_neon_vst4_v: 5593 case NEON::BI__builtin_neon_vst4q_v: { 5594 Ops.push_back(Ops[0]); 5595 Ops.erase(Ops.begin()); 5596 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 5597 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), 5598 Ops, ""); 5599 } 5600 case NEON::BI__builtin_neon_vst4_lane_v: 5601 case NEON::BI__builtin_neon_vst4q_lane_v: { 5602 Ops.push_back(Ops[0]); 5603 Ops.erase(Ops.begin()); 5604 Ops[4] = Builder.CreateZExt(Ops[4], 5605 llvm::IntegerType::get(getLLVMContext(), 64)); 5606 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; 5607 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), 5608 Ops, ""); 5609 } 5610 case NEON::BI__builtin_neon_vtrn_v: 5611 case NEON::BI__builtin_neon_vtrnq_v: { 5612 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 5613 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5614 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5615 Value *SV = nullptr; 5616 5617 for (unsigned vi = 0; vi != 2; ++vi) { 5618 SmallVector<Constant*, 16> Indices; 5619 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 5620 Indices.push_back(ConstantInt::get(Int32Ty, i+vi)); 5621 Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi)); 5622 } 5623 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 5624 SV = llvm::ConstantVector::get(Indices); 5625 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); 5626 SV = Builder.CreateStore(SV, Addr); 5627 } 5628 return SV; 5629 } 5630 case NEON::BI__builtin_neon_vuzp_v: 5631 case NEON::BI__builtin_neon_vuzpq_v: { 5632 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 5633 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5634 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5635 Value *SV = nullptr; 5636 5637 for (unsigned vi = 0; vi != 2; ++vi) { 5638 SmallVector<Constant*, 16> Indices; 5639 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 5640 Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); 5641 5642 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 5643 SV = llvm::ConstantVector::get(Indices); 5644 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); 5645 SV = Builder.CreateStore(SV, Addr); 5646 } 5647 return SV; 5648 } 5649 case NEON::BI__builtin_neon_vzip_v: 5650 case NEON::BI__builtin_neon_vzipq_v: { 5651 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 5652 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5653 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5654 Value *SV = nullptr; 5655 5656 for (unsigned vi = 0; vi != 2; ++vi) { 5657 SmallVector<Constant*, 16> Indices; 5658 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 5659 Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); 5660 Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); 5661 } 5662 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 5663 SV = llvm::ConstantVector::get(Indices); 5664 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); 5665 SV = Builder.CreateStore(SV, Addr); 5666 } 5667 return SV; 5668 } 5669 case NEON::BI__builtin_neon_vqtbl1q_v: { 5670 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty), 5671 Ops, "vtbl1"); 5672 } 5673 case NEON::BI__builtin_neon_vqtbl2q_v: { 5674 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty), 5675 Ops, "vtbl2"); 5676 } 5677 case NEON::BI__builtin_neon_vqtbl3q_v: { 5678 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty), 5679 Ops, "vtbl3"); 5680 } 5681 case NEON::BI__builtin_neon_vqtbl4q_v: { 5682 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty), 5683 Ops, "vtbl4"); 5684 } 5685 case NEON::BI__builtin_neon_vqtbx1q_v: { 5686 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty), 5687 Ops, "vtbx1"); 5688 } 5689 case NEON::BI__builtin_neon_vqtbx2q_v: { 5690 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty), 5691 Ops, "vtbx2"); 5692 } 5693 case NEON::BI__builtin_neon_vqtbx3q_v: { 5694 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty), 5695 Ops, "vtbx3"); 5696 } 5697 case NEON::BI__builtin_neon_vqtbx4q_v: { 5698 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty), 5699 Ops, "vtbx4"); 5700 } 5701 case NEON::BI__builtin_neon_vsqadd_v: 5702 case NEON::BI__builtin_neon_vsqaddq_v: { 5703 Int = Intrinsic::aarch64_neon_usqadd; 5704 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); 5705 } 5706 case NEON::BI__builtin_neon_vuqadd_v: 5707 case NEON::BI__builtin_neon_vuqaddq_v: { 5708 Int = Intrinsic::aarch64_neon_suqadd; 5709 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); 5710 } 5711 } 5712 } 5713 5714 llvm::Value *CodeGenFunction:: 5715 BuildVector(ArrayRef<llvm::Value*> Ops) { 5716 assert((Ops.size() & (Ops.size() - 1)) == 0 && 5717 "Not a power-of-two sized vector!"); 5718 bool AllConstants = true; 5719 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 5720 AllConstants &= isa<Constant>(Ops[i]); 5721 5722 // If this is a constant vector, create a ConstantVector. 5723 if (AllConstants) { 5724 SmallVector<llvm::Constant*, 16> CstOps; 5725 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 5726 CstOps.push_back(cast<Constant>(Ops[i])); 5727 return llvm::ConstantVector::get(CstOps); 5728 } 5729 5730 // Otherwise, insertelement the values to build the vector. 5731 Value *Result = 5732 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 5733 5734 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 5735 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); 5736 5737 return Result; 5738 } 5739 5740 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 5741 const CallExpr *E) { 5742 SmallVector<Value*, 4> Ops; 5743 5744 // Find out if any arguments are required to be integer constant expressions. 5745 unsigned ICEArguments = 0; 5746 ASTContext::GetBuiltinTypeError Error; 5747 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 5748 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 5749 5750 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 5751 // If this is a normal argument, just emit it as a scalar. 5752 if ((ICEArguments & (1 << i)) == 0) { 5753 Ops.push_back(EmitScalarExpr(E->getArg(i))); 5754 continue; 5755 } 5756 5757 // If this is required to be a constant, constant fold it so that we know 5758 // that the generated intrinsic gets a ConstantInt. 5759 llvm::APSInt Result; 5760 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 5761 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 5762 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 5763 } 5764 5765 switch (BuiltinID) { 5766 default: return nullptr; 5767 case X86::BI_mm_prefetch: { 5768 Value *Address = EmitScalarExpr(E->getArg(0)); 5769 Value *RW = ConstantInt::get(Int32Ty, 0); 5770 Value *Locality = EmitScalarExpr(E->getArg(1)); 5771 Value *Data = ConstantInt::get(Int32Ty, 1); 5772 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 5773 return Builder.CreateCall4(F, Address, RW, Locality, Data); 5774 } 5775 case X86::BI__builtin_ia32_vec_init_v8qi: 5776 case X86::BI__builtin_ia32_vec_init_v4hi: 5777 case X86::BI__builtin_ia32_vec_init_v2si: 5778 return Builder.CreateBitCast(BuildVector(Ops), 5779 llvm::Type::getX86_MMXTy(getLLVMContext())); 5780 case X86::BI__builtin_ia32_vec_ext_v2si: 5781 return Builder.CreateExtractElement(Ops[0], 5782 llvm::ConstantInt::get(Ops[1]->getType(), 0)); 5783 case X86::BI__builtin_ia32_ldmxcsr: { 5784 Value *Tmp = CreateMemTemp(E->getArg(0)->getType()); 5785 Builder.CreateStore(Ops[0], Tmp); 5786 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 5787 Builder.CreateBitCast(Tmp, Int8PtrTy)); 5788 } 5789 case X86::BI__builtin_ia32_stmxcsr: { 5790 Value *Tmp = CreateMemTemp(E->getType()); 5791 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 5792 Builder.CreateBitCast(Tmp, Int8PtrTy)); 5793 return Builder.CreateLoad(Tmp, "stmxcsr"); 5794 } 5795 case X86::BI__builtin_ia32_storehps: 5796 case X86::BI__builtin_ia32_storelps: { 5797 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 5798 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 5799 5800 // cast val v2i64 5801 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 5802 5803 // extract (0, 1) 5804 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 5805 llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index); 5806 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 5807 5808 // cast pointer to i64 & store 5809 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 5810 return Builder.CreateStore(Ops[1], Ops[0]); 5811 } 5812 case X86::BI__builtin_ia32_palignr: { 5813 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 5814 5815 // If palignr is shifting the pair of input vectors less than 9 bytes, 5816 // emit a shuffle instruction. 5817 if (shiftVal <= 8) { 5818 SmallVector<llvm::Constant*, 8> Indices; 5819 for (unsigned i = 0; i != 8; ++i) 5820 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i)); 5821 5822 Value* SV = llvm::ConstantVector::get(Indices); 5823 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 5824 } 5825 5826 // If palignr is shifting the pair of input vectors more than 8 but less 5827 // than 16 bytes, emit a logical right shift of the destination. 5828 if (shiftVal < 16) { 5829 // MMX has these as 1 x i64 vectors for some odd optimization reasons. 5830 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1); 5831 5832 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 5833 Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8); 5834 5835 // create i32 constant 5836 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q); 5837 return Builder.CreateCall(F, makeArrayRef(Ops.data(), 2), "palignr"); 5838 } 5839 5840 // If palignr is shifting the pair of vectors more than 16 bytes, emit zero. 5841 return llvm::Constant::getNullValue(ConvertType(E->getType())); 5842 } 5843 case X86::BI__builtin_ia32_palignr128: { 5844 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 5845 5846 // If palignr is shifting the pair of input vectors less than 17 bytes, 5847 // emit a shuffle instruction. 5848 if (shiftVal <= 16) { 5849 SmallVector<llvm::Constant*, 16> Indices; 5850 for (unsigned i = 0; i != 16; ++i) 5851 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i)); 5852 5853 Value* SV = llvm::ConstantVector::get(Indices); 5854 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 5855 } 5856 5857 // If palignr is shifting the pair of input vectors more than 16 but less 5858 // than 32 bytes, emit a logical right shift of the destination. 5859 if (shiftVal < 32) { 5860 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 5861 5862 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 5863 Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8); 5864 5865 // create i32 constant 5866 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq); 5867 return Builder.CreateCall(F, makeArrayRef(Ops.data(), 2), "palignr"); 5868 } 5869 5870 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. 5871 return llvm::Constant::getNullValue(ConvertType(E->getType())); 5872 } 5873 case X86::BI__builtin_ia32_palignr256: { 5874 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 5875 5876 // If palignr is shifting the pair of input vectors less than 17 bytes, 5877 // emit a shuffle instruction. 5878 if (shiftVal <= 16) { 5879 SmallVector<llvm::Constant*, 32> Indices; 5880 // 256-bit palignr operates on 128-bit lanes so we need to handle that 5881 for (unsigned l = 0; l != 2; ++l) { 5882 unsigned LaneStart = l * 16; 5883 unsigned LaneEnd = (l+1) * 16; 5884 for (unsigned i = 0; i != 16; ++i) { 5885 unsigned Idx = shiftVal + i + LaneStart; 5886 if (Idx >= LaneEnd) Idx += 16; // end of lane, switch operand 5887 Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx)); 5888 } 5889 } 5890 5891 Value* SV = llvm::ConstantVector::get(Indices); 5892 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 5893 } 5894 5895 // If palignr is shifting the pair of input vectors more than 16 but less 5896 // than 32 bytes, emit a logical right shift of the destination. 5897 if (shiftVal < 32) { 5898 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 4); 5899 5900 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 5901 Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8); 5902 5903 // create i32 constant 5904 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_avx2_psrl_dq); 5905 return Builder.CreateCall(F, makeArrayRef(Ops.data(), 2), "palignr"); 5906 } 5907 5908 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. 5909 return llvm::Constant::getNullValue(ConvertType(E->getType())); 5910 } 5911 case X86::BI__builtin_ia32_movntps: 5912 case X86::BI__builtin_ia32_movntps256: 5913 case X86::BI__builtin_ia32_movntpd: 5914 case X86::BI__builtin_ia32_movntpd256: 5915 case X86::BI__builtin_ia32_movntdq: 5916 case X86::BI__builtin_ia32_movntdq256: 5917 case X86::BI__builtin_ia32_movnti: 5918 case X86::BI__builtin_ia32_movnti64: { 5919 llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(), 5920 Builder.getInt32(1)); 5921 5922 // Convert the type of the pointer to a pointer to the stored type. 5923 Value *BC = Builder.CreateBitCast(Ops[0], 5924 llvm::PointerType::getUnqual(Ops[1]->getType()), 5925 "cast"); 5926 StoreInst *SI = Builder.CreateStore(Ops[1], BC); 5927 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 5928 5929 // If the operand is an integer, we can't assume alignment. Otherwise, 5930 // assume natural alignment. 5931 QualType ArgTy = E->getArg(1)->getType(); 5932 unsigned Align; 5933 if (ArgTy->isIntegerType()) 5934 Align = 1; 5935 else 5936 Align = getContext().getTypeSizeInChars(ArgTy).getQuantity(); 5937 SI->setAlignment(Align); 5938 return SI; 5939 } 5940 // 3DNow! 5941 case X86::BI__builtin_ia32_pswapdsf: 5942 case X86::BI__builtin_ia32_pswapdsi: { 5943 const char *name = nullptr; 5944 Intrinsic::ID ID = Intrinsic::not_intrinsic; 5945 switch(BuiltinID) { 5946 default: llvm_unreachable("Unsupported intrinsic!"); 5947 case X86::BI__builtin_ia32_pswapdsf: 5948 case X86::BI__builtin_ia32_pswapdsi: 5949 name = "pswapd"; 5950 ID = Intrinsic::x86_3dnowa_pswapd; 5951 break; 5952 } 5953 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); 5954 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); 5955 llvm::Function *F = CGM.getIntrinsic(ID); 5956 return Builder.CreateCall(F, Ops, name); 5957 } 5958 case X86::BI__builtin_ia32_rdrand16_step: 5959 case X86::BI__builtin_ia32_rdrand32_step: 5960 case X86::BI__builtin_ia32_rdrand64_step: 5961 case X86::BI__builtin_ia32_rdseed16_step: 5962 case X86::BI__builtin_ia32_rdseed32_step: 5963 case X86::BI__builtin_ia32_rdseed64_step: { 5964 Intrinsic::ID ID; 5965 switch (BuiltinID) { 5966 default: llvm_unreachable("Unsupported intrinsic!"); 5967 case X86::BI__builtin_ia32_rdrand16_step: 5968 ID = Intrinsic::x86_rdrand_16; 5969 break; 5970 case X86::BI__builtin_ia32_rdrand32_step: 5971 ID = Intrinsic::x86_rdrand_32; 5972 break; 5973 case X86::BI__builtin_ia32_rdrand64_step: 5974 ID = Intrinsic::x86_rdrand_64; 5975 break; 5976 case X86::BI__builtin_ia32_rdseed16_step: 5977 ID = Intrinsic::x86_rdseed_16; 5978 break; 5979 case X86::BI__builtin_ia32_rdseed32_step: 5980 ID = Intrinsic::x86_rdseed_32; 5981 break; 5982 case X86::BI__builtin_ia32_rdseed64_step: 5983 ID = Intrinsic::x86_rdseed_64; 5984 break; 5985 } 5986 5987 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); 5988 Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]); 5989 return Builder.CreateExtractValue(Call, 1); 5990 } 5991 // AVX2 broadcast 5992 case X86::BI__builtin_ia32_vbroadcastsi256: { 5993 Value *VecTmp = CreateMemTemp(E->getArg(0)->getType()); 5994 Builder.CreateStore(Ops[0], VecTmp); 5995 Value *F = CGM.getIntrinsic(Intrinsic::x86_avx2_vbroadcasti128); 5996 return Builder.CreateCall(F, Builder.CreateBitCast(VecTmp, Int8PtrTy)); 5997 } 5998 } 5999 } 6000 6001 6002 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 6003 const CallExpr *E) { 6004 SmallVector<Value*, 4> Ops; 6005 6006 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 6007 Ops.push_back(EmitScalarExpr(E->getArg(i))); 6008 6009 Intrinsic::ID ID = Intrinsic::not_intrinsic; 6010 6011 switch (BuiltinID) { 6012 default: return nullptr; 6013 6014 // vec_ld, vec_lvsl, vec_lvsr 6015 case PPC::BI__builtin_altivec_lvx: 6016 case PPC::BI__builtin_altivec_lvxl: 6017 case PPC::BI__builtin_altivec_lvebx: 6018 case PPC::BI__builtin_altivec_lvehx: 6019 case PPC::BI__builtin_altivec_lvewx: 6020 case PPC::BI__builtin_altivec_lvsl: 6021 case PPC::BI__builtin_altivec_lvsr: 6022 { 6023 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 6024 6025 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); 6026 Ops.pop_back(); 6027 6028 switch (BuiltinID) { 6029 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); 6030 case PPC::BI__builtin_altivec_lvx: 6031 ID = Intrinsic::ppc_altivec_lvx; 6032 break; 6033 case PPC::BI__builtin_altivec_lvxl: 6034 ID = Intrinsic::ppc_altivec_lvxl; 6035 break; 6036 case PPC::BI__builtin_altivec_lvebx: 6037 ID = Intrinsic::ppc_altivec_lvebx; 6038 break; 6039 case PPC::BI__builtin_altivec_lvehx: 6040 ID = Intrinsic::ppc_altivec_lvehx; 6041 break; 6042 case PPC::BI__builtin_altivec_lvewx: 6043 ID = Intrinsic::ppc_altivec_lvewx; 6044 break; 6045 case PPC::BI__builtin_altivec_lvsl: 6046 ID = Intrinsic::ppc_altivec_lvsl; 6047 break; 6048 case PPC::BI__builtin_altivec_lvsr: 6049 ID = Intrinsic::ppc_altivec_lvsr; 6050 break; 6051 } 6052 llvm::Function *F = CGM.getIntrinsic(ID); 6053 return Builder.CreateCall(F, Ops, ""); 6054 } 6055 6056 // vec_st 6057 case PPC::BI__builtin_altivec_stvx: 6058 case PPC::BI__builtin_altivec_stvxl: 6059 case PPC::BI__builtin_altivec_stvebx: 6060 case PPC::BI__builtin_altivec_stvehx: 6061 case PPC::BI__builtin_altivec_stvewx: 6062 { 6063 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 6064 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); 6065 Ops.pop_back(); 6066 6067 switch (BuiltinID) { 6068 default: llvm_unreachable("Unsupported st intrinsic!"); 6069 case PPC::BI__builtin_altivec_stvx: 6070 ID = Intrinsic::ppc_altivec_stvx; 6071 break; 6072 case PPC::BI__builtin_altivec_stvxl: 6073 ID = Intrinsic::ppc_altivec_stvxl; 6074 break; 6075 case PPC::BI__builtin_altivec_stvebx: 6076 ID = Intrinsic::ppc_altivec_stvebx; 6077 break; 6078 case PPC::BI__builtin_altivec_stvehx: 6079 ID = Intrinsic::ppc_altivec_stvehx; 6080 break; 6081 case PPC::BI__builtin_altivec_stvewx: 6082 ID = Intrinsic::ppc_altivec_stvewx; 6083 break; 6084 } 6085 llvm::Function *F = CGM.getIntrinsic(ID); 6086 return Builder.CreateCall(F, Ops, ""); 6087 } 6088 } 6089 } 6090 6091 // Emit an intrinsic that has 1 float or double. 6092 static Value *emitUnaryFPBuiltin(CodeGenFunction &CGF, 6093 const CallExpr *E, 6094 unsigned IntrinsicID) { 6095 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 6096 6097 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 6098 return CGF.Builder.CreateCall(F, Src0); 6099 } 6100 6101 // Emit an intrinsic that has 3 float or double operands. 6102 static Value *emitTernaryFPBuiltin(CodeGenFunction &CGF, 6103 const CallExpr *E, 6104 unsigned IntrinsicID) { 6105 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 6106 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 6107 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); 6108 6109 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 6110 return CGF.Builder.CreateCall3(F, Src0, Src1, Src2); 6111 } 6112 6113 // Emit an intrinsic that has 1 float or double operand, and 1 integer. 6114 static Value *emitFPIntBuiltin(CodeGenFunction &CGF, 6115 const CallExpr *E, 6116 unsigned IntrinsicID) { 6117 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 6118 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 6119 6120 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 6121 return CGF.Builder.CreateCall2(F, Src0, Src1); 6122 } 6123 6124 Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID, 6125 const CallExpr *E) { 6126 switch (BuiltinID) { 6127 case R600::BI__builtin_amdgpu_div_scale: 6128 case R600::BI__builtin_amdgpu_div_scalef: { 6129 // Translate from the intrinsics's struct return to the builtin's out 6130 // argument. 6131 6132 std::pair<llvm::Value *, unsigned> FlagOutPtr 6133 = EmitPointerWithAlignment(E->getArg(3)); 6134 6135 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 6136 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 6137 llvm::Value *Z = EmitScalarExpr(E->getArg(2)); 6138 6139 llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale, 6140 X->getType()); 6141 6142 llvm::Value *Tmp = Builder.CreateCall3(Callee, X, Y, Z); 6143 6144 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); 6145 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); 6146 6147 llvm::Type *RealFlagType 6148 = FlagOutPtr.first->getType()->getPointerElementType(); 6149 6150 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); 6151 llvm::StoreInst *FlagStore = Builder.CreateStore(FlagExt, FlagOutPtr.first); 6152 FlagStore->setAlignment(FlagOutPtr.second); 6153 return Result; 6154 } 6155 case R600::BI__builtin_amdgpu_div_fmas: 6156 case R600::BI__builtin_amdgpu_div_fmasf: 6157 return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fmas); 6158 case R600::BI__builtin_amdgpu_div_fixup: 6159 case R600::BI__builtin_amdgpu_div_fixupf: 6160 return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup); 6161 case R600::BI__builtin_amdgpu_trig_preop: 6162 case R600::BI__builtin_amdgpu_trig_preopf: 6163 return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop); 6164 case R600::BI__builtin_amdgpu_rcp: 6165 case R600::BI__builtin_amdgpu_rcpf: 6166 return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp); 6167 case R600::BI__builtin_amdgpu_rsq: 6168 case R600::BI__builtin_amdgpu_rsqf: 6169 return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq); 6170 case R600::BI__builtin_amdgpu_rsq_clamped: 6171 case R600::BI__builtin_amdgpu_rsq_clampedf: 6172 return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped); 6173 case R600::BI__builtin_amdgpu_ldexp: 6174 case R600::BI__builtin_amdgpu_ldexpf: 6175 return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp); 6176 default: 6177 return nullptr; 6178 } 6179 } 6180