1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit Builtin calls as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGObjCRuntime.h" 16 #include "CGOpenCLRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "CodeGenModule.h" 19 #include "TargetInfo.h" 20 #include "clang/AST/ASTContext.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/Analysis/Analyses/OSLog.h" 23 #include "clang/Basic/TargetBuiltins.h" 24 #include "clang/Basic/TargetInfo.h" 25 #include "clang/CodeGen/CGFunctionInfo.h" 26 #include "llvm/ADT/StringExtras.h" 27 #include "llvm/IR/CallSite.h" 28 #include "llvm/IR/DataLayout.h" 29 #include "llvm/IR/InlineAsm.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/MDBuilder.h" 32 #include <sstream> 33 34 using namespace clang; 35 using namespace CodeGen; 36 using namespace llvm; 37 38 static 39 int64_t clamp(int64_t Value, int64_t Low, int64_t High) { 40 return std::min(High, std::max(Low, Value)); 41 } 42 43 /// getBuiltinLibFunction - Given a builtin id for a function like 44 /// "__builtin_fabsf", return a Function* for "fabsf". 45 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, 46 unsigned BuiltinID) { 47 assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); 48 49 // Get the name, skip over the __builtin_ prefix (if necessary). 50 StringRef Name; 51 GlobalDecl D(FD); 52 53 // If the builtin has been declared explicitly with an assembler label, 54 // use the mangled name. This differs from the plain label on platforms 55 // that prefix labels. 56 if (FD->hasAttr<AsmLabelAttr>()) 57 Name = getMangledName(D); 58 else 59 Name = Context.BuiltinInfo.getName(BuiltinID) + 10; 60 61 llvm::FunctionType *Ty = 62 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); 63 64 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); 65 } 66 67 /// Emit the conversions required to turn the given value into an 68 /// integer of the given size. 69 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 70 QualType T, llvm::IntegerType *IntType) { 71 V = CGF.EmitToMemory(V, T); 72 73 if (V->getType()->isPointerTy()) 74 return CGF.Builder.CreatePtrToInt(V, IntType); 75 76 assert(V->getType() == IntType); 77 return V; 78 } 79 80 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 81 QualType T, llvm::Type *ResultType) { 82 V = CGF.EmitFromMemory(V, T); 83 84 if (ResultType->isPointerTy()) 85 return CGF.Builder.CreateIntToPtr(V, ResultType); 86 87 assert(V->getType() == ResultType); 88 return V; 89 } 90 91 /// Utility to insert an atomic instruction based on Instrinsic::ID 92 /// and the expression node. 93 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, 94 llvm::AtomicRMWInst::BinOp Kind, 95 const CallExpr *E) { 96 QualType T = E->getType(); 97 assert(E->getArg(0)->getType()->isPointerType()); 98 assert(CGF.getContext().hasSameUnqualifiedType(T, 99 E->getArg(0)->getType()->getPointeeType())); 100 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 101 102 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 103 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 104 105 llvm::IntegerType *IntType = 106 llvm::IntegerType::get(CGF.getLLVMContext(), 107 CGF.getContext().getTypeSize(T)); 108 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 109 110 llvm::Value *Args[2]; 111 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 112 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 113 llvm::Type *ValueType = Args[1]->getType(); 114 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 115 116 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 117 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 118 return EmitFromInt(CGF, Result, T, ValueType); 119 } 120 121 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { 122 Value *Val = CGF.EmitScalarExpr(E->getArg(0)); 123 Value *Address = CGF.EmitScalarExpr(E->getArg(1)); 124 125 // Convert the type of the pointer to a pointer to the stored type. 126 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); 127 Value *BC = CGF.Builder.CreateBitCast( 128 Address, llvm::PointerType::getUnqual(Val->getType()), "cast"); 129 LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType()); 130 LV.setNontemporal(true); 131 CGF.EmitStoreOfScalar(Val, LV, false); 132 return nullptr; 133 } 134 135 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) { 136 Value *Address = CGF.EmitScalarExpr(E->getArg(0)); 137 138 LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType()); 139 LV.setNontemporal(true); 140 return CGF.EmitLoadOfScalar(LV, E->getExprLoc()); 141 } 142 143 static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 144 llvm::AtomicRMWInst::BinOp Kind, 145 const CallExpr *E) { 146 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E)); 147 } 148 149 /// Utility to insert an atomic instruction based Instrinsic::ID and 150 /// the expression node, where the return value is the result of the 151 /// operation. 152 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 153 llvm::AtomicRMWInst::BinOp Kind, 154 const CallExpr *E, 155 Instruction::BinaryOps Op, 156 bool Invert = false) { 157 QualType T = E->getType(); 158 assert(E->getArg(0)->getType()->isPointerType()); 159 assert(CGF.getContext().hasSameUnqualifiedType(T, 160 E->getArg(0)->getType()->getPointeeType())); 161 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 162 163 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 164 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 165 166 llvm::IntegerType *IntType = 167 llvm::IntegerType::get(CGF.getLLVMContext(), 168 CGF.getContext().getTypeSize(T)); 169 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 170 171 llvm::Value *Args[2]; 172 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 173 llvm::Type *ValueType = Args[1]->getType(); 174 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 175 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 176 177 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 178 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 179 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 180 if (Invert) 181 Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, 182 llvm::ConstantInt::get(IntType, -1)); 183 Result = EmitFromInt(CGF, Result, T, ValueType); 184 return RValue::get(Result); 185 } 186 187 /// @brief Utility to insert an atomic cmpxchg instruction. 188 /// 189 /// @param CGF The current codegen function. 190 /// @param E Builtin call expression to convert to cmpxchg. 191 /// arg0 - address to operate on 192 /// arg1 - value to compare with 193 /// arg2 - new value 194 /// @param ReturnBool Specifies whether to return success flag of 195 /// cmpxchg result or the old value. 196 /// 197 /// @returns result of cmpxchg, according to ReturnBool 198 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, 199 bool ReturnBool) { 200 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); 201 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 202 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 203 204 llvm::IntegerType *IntType = llvm::IntegerType::get( 205 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); 206 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 207 208 Value *Args[3]; 209 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 210 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 211 llvm::Type *ValueType = Args[1]->getType(); 212 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 213 Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); 214 215 Value *Pair = CGF.Builder.CreateAtomicCmpXchg( 216 Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent, 217 llvm::AtomicOrdering::SequentiallyConsistent); 218 if (ReturnBool) 219 // Extract boolean success flag and zext it to int. 220 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1), 221 CGF.ConvertType(E->getType())); 222 else 223 // Extract old value and emit it using the same type as compare value. 224 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T, 225 ValueType); 226 } 227 228 // Emit a simple mangled intrinsic that has 1 argument and a return type 229 // matching the argument type. 230 static Value *emitUnaryBuiltin(CodeGenFunction &CGF, 231 const CallExpr *E, 232 unsigned IntrinsicID) { 233 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 234 235 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 236 return CGF.Builder.CreateCall(F, Src0); 237 } 238 239 // Emit an intrinsic that has 2 operands of the same type as its result. 240 static Value *emitBinaryBuiltin(CodeGenFunction &CGF, 241 const CallExpr *E, 242 unsigned IntrinsicID) { 243 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 244 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 245 246 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 247 return CGF.Builder.CreateCall(F, { Src0, Src1 }); 248 } 249 250 // Emit an intrinsic that has 3 operands of the same type as its result. 251 static Value *emitTernaryBuiltin(CodeGenFunction &CGF, 252 const CallExpr *E, 253 unsigned IntrinsicID) { 254 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 255 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 256 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); 257 258 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 259 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); 260 } 261 262 // Emit an intrinsic that has 1 float or double operand, and 1 integer. 263 static Value *emitFPIntBuiltin(CodeGenFunction &CGF, 264 const CallExpr *E, 265 unsigned IntrinsicID) { 266 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 267 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 268 269 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 270 return CGF.Builder.CreateCall(F, {Src0, Src1}); 271 } 272 273 /// EmitFAbs - Emit a call to @llvm.fabs(). 274 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { 275 Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); 276 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); 277 Call->setDoesNotAccessMemory(); 278 return Call; 279 } 280 281 /// Emit the computation of the sign bit for a floating point value. Returns 282 /// the i1 sign bit value. 283 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { 284 LLVMContext &C = CGF.CGM.getLLVMContext(); 285 286 llvm::Type *Ty = V->getType(); 287 int Width = Ty->getPrimitiveSizeInBits(); 288 llvm::Type *IntTy = llvm::IntegerType::get(C, Width); 289 V = CGF.Builder.CreateBitCast(V, IntTy); 290 if (Ty->isPPC_FP128Ty()) { 291 // We want the sign bit of the higher-order double. The bitcast we just 292 // did works as if the double-double was stored to memory and then 293 // read as an i128. The "store" will put the higher-order double in the 294 // lower address in both little- and big-Endian modes, but the "load" 295 // will treat those bits as a different part of the i128: the low bits in 296 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian 297 // we need to shift the high bits down to the low before truncating. 298 Width >>= 1; 299 if (CGF.getTarget().isBigEndian()) { 300 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); 301 V = CGF.Builder.CreateLShr(V, ShiftCst); 302 } 303 // We are truncating value in order to extract the higher-order 304 // double, which we will be using to extract the sign from. 305 IntTy = llvm::IntegerType::get(C, Width); 306 V = CGF.Builder.CreateTrunc(V, IntTy); 307 } 308 Value *Zero = llvm::Constant::getNullValue(IntTy); 309 return CGF.Builder.CreateICmpSLT(V, Zero); 310 } 311 312 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, 313 const CallExpr *E, llvm::Constant *calleeValue) { 314 CGCallee callee = CGCallee::forDirect(calleeValue, FD); 315 return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); 316 } 317 318 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* 319 /// depending on IntrinsicID. 320 /// 321 /// \arg CGF The current codegen function. 322 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. 323 /// \arg X The first argument to the llvm.*.with.overflow.*. 324 /// \arg Y The second argument to the llvm.*.with.overflow.*. 325 /// \arg Carry The carry returned by the llvm.*.with.overflow.*. 326 /// \returns The result (i.e. sum/product) returned by the intrinsic. 327 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, 328 const llvm::Intrinsic::ID IntrinsicID, 329 llvm::Value *X, llvm::Value *Y, 330 llvm::Value *&Carry) { 331 // Make sure we have integers of the same width. 332 assert(X->getType() == Y->getType() && 333 "Arguments must be the same type. (Did you forget to make sure both " 334 "arguments have the same integer width?)"); 335 336 llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); 337 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y}); 338 Carry = CGF.Builder.CreateExtractValue(Tmp, 1); 339 return CGF.Builder.CreateExtractValue(Tmp, 0); 340 } 341 342 static Value *emitRangedBuiltin(CodeGenFunction &CGF, 343 unsigned IntrinsicID, 344 int low, int high) { 345 llvm::MDBuilder MDHelper(CGF.getLLVMContext()); 346 llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); 347 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); 348 llvm::Instruction *Call = CGF.Builder.CreateCall(F); 349 Call->setMetadata(llvm::LLVMContext::MD_range, RNode); 350 return Call; 351 } 352 353 namespace { 354 struct WidthAndSignedness { 355 unsigned Width; 356 bool Signed; 357 }; 358 } 359 360 static WidthAndSignedness 361 getIntegerWidthAndSignedness(const clang::ASTContext &context, 362 const clang::QualType Type) { 363 assert(Type->isIntegerType() && "Given type is not an integer."); 364 unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width; 365 bool Signed = Type->isSignedIntegerType(); 366 return {Width, Signed}; 367 } 368 369 // Given one or more integer types, this function produces an integer type that 370 // encompasses them: any value in one of the given types could be expressed in 371 // the encompassing type. 372 static struct WidthAndSignedness 373 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) { 374 assert(Types.size() > 0 && "Empty list of types."); 375 376 // If any of the given types is signed, we must return a signed type. 377 bool Signed = false; 378 for (const auto &Type : Types) { 379 Signed |= Type.Signed; 380 } 381 382 // The encompassing type must have a width greater than or equal to the width 383 // of the specified types. Aditionally, if the encompassing type is signed, 384 // its width must be strictly greater than the width of any unsigned types 385 // given. 386 unsigned Width = 0; 387 for (const auto &Type : Types) { 388 unsigned MinWidth = Type.Width + (Signed && !Type.Signed); 389 if (Width < MinWidth) { 390 Width = MinWidth; 391 } 392 } 393 394 return {Width, Signed}; 395 } 396 397 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) { 398 llvm::Type *DestType = Int8PtrTy; 399 if (ArgValue->getType() != DestType) 400 ArgValue = 401 Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data()); 402 403 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend; 404 return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue); 405 } 406 407 /// Checks if using the result of __builtin_object_size(p, @p From) in place of 408 /// __builtin_object_size(p, @p To) is correct 409 static bool areBOSTypesCompatible(int From, int To) { 410 // Note: Our __builtin_object_size implementation currently treats Type=0 and 411 // Type=2 identically. Encoding this implementation detail here may make 412 // improving __builtin_object_size difficult in the future, so it's omitted. 413 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2); 414 } 415 416 static llvm::Value * 417 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { 418 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true); 419 } 420 421 llvm::Value * 422 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, 423 llvm::IntegerType *ResType) { 424 uint64_t ObjectSize; 425 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) 426 return emitBuiltinObjectSize(E, Type, ResType); 427 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); 428 } 429 430 /// Returns a Value corresponding to the size of the given expression. 431 /// This Value may be either of the following: 432 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on 433 /// it) 434 /// - A call to the @llvm.objectsize intrinsic 435 llvm::Value * 436 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, 437 llvm::IntegerType *ResType) { 438 // We need to reference an argument if the pointer is a parameter with the 439 // pass_object_size attribute. 440 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { 441 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl()); 442 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>(); 443 if (Param != nullptr && PS != nullptr && 444 areBOSTypesCompatible(PS->getType(), Type)) { 445 auto Iter = SizeArguments.find(Param); 446 assert(Iter != SizeArguments.end()); 447 448 const ImplicitParamDecl *D = Iter->second; 449 auto DIter = LocalDeclMap.find(D); 450 assert(DIter != LocalDeclMap.end()); 451 452 return EmitLoadOfScalar(DIter->second, /*volatile=*/false, 453 getContext().getSizeType(), E->getLocStart()); 454 } 455 } 456 457 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't 458 // evaluate E for side-effects. In either case, we shouldn't lower to 459 // @llvm.objectsize. 460 if (Type == 3 || E->HasSideEffects(getContext())) 461 return getDefaultBuiltinObjectSizeResult(Type, ResType); 462 463 // LLVM only supports 0 and 2, make sure that we pass along that 464 // as a boolean. 465 auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1); 466 // FIXME: Get right address space. 467 llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)}; 468 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys); 469 return Builder.CreateCall(F, {EmitScalarExpr(E), CI}); 470 } 471 472 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we 473 // handle them here. 474 enum class CodeGenFunction::MSVCIntrin { 475 _BitScanForward, 476 _BitScanReverse, 477 _InterlockedAnd, 478 _InterlockedDecrement, 479 _InterlockedExchange, 480 _InterlockedExchangeAdd, 481 _InterlockedExchangeSub, 482 _InterlockedIncrement, 483 _InterlockedOr, 484 _InterlockedXor, 485 }; 486 487 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, 488 const CallExpr *E) { 489 switch (BuiltinID) { 490 case MSVCIntrin::_BitScanForward: 491 case MSVCIntrin::_BitScanReverse: { 492 Value *ArgValue = EmitScalarExpr(E->getArg(1)); 493 494 llvm::Type *ArgType = ArgValue->getType(); 495 llvm::Type *IndexType = 496 EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType(); 497 llvm::Type *ResultType = ConvertType(E->getType()); 498 499 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 500 Value *ResZero = llvm::Constant::getNullValue(ResultType); 501 Value *ResOne = llvm::ConstantInt::get(ResultType, 1); 502 503 BasicBlock *Begin = Builder.GetInsertBlock(); 504 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn); 505 Builder.SetInsertPoint(End); 506 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result"); 507 508 Builder.SetInsertPoint(Begin); 509 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero); 510 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn); 511 Builder.CreateCondBr(IsZero, End, NotZero); 512 Result->addIncoming(ResZero, Begin); 513 514 Builder.SetInsertPoint(NotZero); 515 Address IndexAddress = EmitPointerWithAlignment(E->getArg(0)); 516 517 if (BuiltinID == MSVCIntrin::_BitScanForward) { 518 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 519 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 520 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 521 Builder.CreateStore(ZeroCount, IndexAddress, false); 522 } else { 523 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 524 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1); 525 526 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 527 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 528 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 529 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount); 530 Builder.CreateStore(Index, IndexAddress, false); 531 } 532 Builder.CreateBr(End); 533 Result->addIncoming(ResOne, NotZero); 534 535 Builder.SetInsertPoint(End); 536 return Result; 537 } 538 case MSVCIntrin::_InterlockedAnd: 539 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E); 540 case MSVCIntrin::_InterlockedExchange: 541 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E); 542 case MSVCIntrin::_InterlockedExchangeAdd: 543 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E); 544 case MSVCIntrin::_InterlockedExchangeSub: 545 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E); 546 case MSVCIntrin::_InterlockedOr: 547 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E); 548 case MSVCIntrin::_InterlockedXor: 549 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); 550 551 case MSVCIntrin::_InterlockedDecrement: { 552 llvm::Type *IntTy = ConvertType(E->getType()); 553 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 554 AtomicRMWInst::Sub, 555 EmitScalarExpr(E->getArg(0)), 556 ConstantInt::get(IntTy, 1), 557 llvm::AtomicOrdering::SequentiallyConsistent); 558 return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)); 559 } 560 case MSVCIntrin::_InterlockedIncrement: { 561 llvm::Type *IntTy = ConvertType(E->getType()); 562 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 563 AtomicRMWInst::Add, 564 EmitScalarExpr(E->getArg(0)), 565 ConstantInt::get(IntTy, 1), 566 llvm::AtomicOrdering::SequentiallyConsistent); 567 return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)); 568 } 569 } 570 llvm_unreachable("Incorrect MSVC intrinsic!"); 571 } 572 573 namespace { 574 // ARC cleanup for __builtin_os_log_format 575 struct CallObjCArcUse final : EHScopeStack::Cleanup { 576 CallObjCArcUse(llvm::Value *object) : object(object) {} 577 llvm::Value *object; 578 579 void Emit(CodeGenFunction &CGF, Flags flags) override { 580 CGF.EmitARCIntrinsicUse(object); 581 } 582 }; 583 } 584 585 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 586 unsigned BuiltinID, const CallExpr *E, 587 ReturnValueSlot ReturnValue) { 588 // See if we can constant fold this builtin. If so, don't emit it at all. 589 Expr::EvalResult Result; 590 if (E->EvaluateAsRValue(Result, CGM.getContext()) && 591 !Result.hasSideEffects()) { 592 if (Result.Val.isInt()) 593 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 594 Result.Val.getInt())); 595 if (Result.Val.isFloat()) 596 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 597 Result.Val.getFloat())); 598 } 599 600 switch (BuiltinID) { 601 default: break; // Handle intrinsics and libm functions below. 602 case Builtin::BI__builtin___CFStringMakeConstantString: 603 case Builtin::BI__builtin___NSStringMakeConstantString: 604 return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr)); 605 case Builtin::BI__builtin_stdarg_start: 606 case Builtin::BI__builtin_va_start: 607 case Builtin::BI__va_start: 608 case Builtin::BI__builtin_va_end: 609 return RValue::get( 610 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start 611 ? EmitScalarExpr(E->getArg(0)) 612 : EmitVAListRef(E->getArg(0)).getPointer(), 613 BuiltinID != Builtin::BI__builtin_va_end)); 614 case Builtin::BI__builtin_va_copy: { 615 Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer(); 616 Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer(); 617 618 llvm::Type *Type = Int8PtrTy; 619 620 DstPtr = Builder.CreateBitCast(DstPtr, Type); 621 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 622 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), 623 {DstPtr, SrcPtr})); 624 } 625 case Builtin::BI__builtin_abs: 626 case Builtin::BI__builtin_labs: 627 case Builtin::BI__builtin_llabs: { 628 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 629 630 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 631 Value *CmpResult = 632 Builder.CreateICmpSGE(ArgValue, 633 llvm::Constant::getNullValue(ArgValue->getType()), 634 "abscond"); 635 Value *Result = 636 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 637 638 return RValue::get(Result); 639 } 640 case Builtin::BI__builtin_fabs: 641 case Builtin::BI__builtin_fabsf: 642 case Builtin::BI__builtin_fabsl: { 643 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); 644 } 645 case Builtin::BI__builtin_fmod: 646 case Builtin::BI__builtin_fmodf: 647 case Builtin::BI__builtin_fmodl: { 648 Value *Arg1 = EmitScalarExpr(E->getArg(0)); 649 Value *Arg2 = EmitScalarExpr(E->getArg(1)); 650 Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod"); 651 return RValue::get(Result); 652 } 653 case Builtin::BI__builtin_copysign: 654 case Builtin::BI__builtin_copysignf: 655 case Builtin::BI__builtin_copysignl: { 656 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); 657 } 658 case Builtin::BI__builtin_ceil: 659 case Builtin::BI__builtin_ceilf: 660 case Builtin::BI__builtin_ceill: { 661 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil)); 662 } 663 case Builtin::BI__builtin_floor: 664 case Builtin::BI__builtin_floorf: 665 case Builtin::BI__builtin_floorl: { 666 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor)); 667 } 668 case Builtin::BI__builtin_trunc: 669 case Builtin::BI__builtin_truncf: 670 case Builtin::BI__builtin_truncl: { 671 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc)); 672 } 673 case Builtin::BI__builtin_rint: 674 case Builtin::BI__builtin_rintf: 675 case Builtin::BI__builtin_rintl: { 676 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint)); 677 } 678 case Builtin::BI__builtin_nearbyint: 679 case Builtin::BI__builtin_nearbyintf: 680 case Builtin::BI__builtin_nearbyintl: { 681 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint)); 682 } 683 case Builtin::BI__builtin_round: 684 case Builtin::BI__builtin_roundf: 685 case Builtin::BI__builtin_roundl: { 686 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round)); 687 } 688 case Builtin::BI__builtin_fmin: 689 case Builtin::BI__builtin_fminf: 690 case Builtin::BI__builtin_fminl: { 691 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum)); 692 } 693 case Builtin::BI__builtin_fmax: 694 case Builtin::BI__builtin_fmaxf: 695 case Builtin::BI__builtin_fmaxl: { 696 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum)); 697 } 698 case Builtin::BI__builtin_conj: 699 case Builtin::BI__builtin_conjf: 700 case Builtin::BI__builtin_conjl: { 701 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 702 Value *Real = ComplexVal.first; 703 Value *Imag = ComplexVal.second; 704 Value *Zero = 705 Imag->getType()->isFPOrFPVectorTy() 706 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) 707 : llvm::Constant::getNullValue(Imag->getType()); 708 709 Imag = Builder.CreateFSub(Zero, Imag, "sub"); 710 return RValue::getComplex(std::make_pair(Real, Imag)); 711 } 712 case Builtin::BI__builtin_creal: 713 case Builtin::BI__builtin_crealf: 714 case Builtin::BI__builtin_creall: 715 case Builtin::BIcreal: 716 case Builtin::BIcrealf: 717 case Builtin::BIcreall: { 718 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 719 return RValue::get(ComplexVal.first); 720 } 721 722 case Builtin::BI__builtin_cimag: 723 case Builtin::BI__builtin_cimagf: 724 case Builtin::BI__builtin_cimagl: 725 case Builtin::BIcimag: 726 case Builtin::BIcimagf: 727 case Builtin::BIcimagl: { 728 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 729 return RValue::get(ComplexVal.second); 730 } 731 732 case Builtin::BI__builtin_ctzs: 733 case Builtin::BI__builtin_ctz: 734 case Builtin::BI__builtin_ctzl: 735 case Builtin::BI__builtin_ctzll: { 736 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 737 738 llvm::Type *ArgType = ArgValue->getType(); 739 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 740 741 llvm::Type *ResultType = ConvertType(E->getType()); 742 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 743 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 744 if (Result->getType() != ResultType) 745 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 746 "cast"); 747 return RValue::get(Result); 748 } 749 case Builtin::BI__builtin_clzs: 750 case Builtin::BI__builtin_clz: 751 case Builtin::BI__builtin_clzl: 752 case Builtin::BI__builtin_clzll: { 753 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 754 755 llvm::Type *ArgType = ArgValue->getType(); 756 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 757 758 llvm::Type *ResultType = ConvertType(E->getType()); 759 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 760 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 761 if (Result->getType() != ResultType) 762 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 763 "cast"); 764 return RValue::get(Result); 765 } 766 case Builtin::BI__builtin_ffs: 767 case Builtin::BI__builtin_ffsl: 768 case Builtin::BI__builtin_ffsll: { 769 // ffs(x) -> x ? cttz(x) + 1 : 0 770 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 771 772 llvm::Type *ArgType = ArgValue->getType(); 773 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 774 775 llvm::Type *ResultType = ConvertType(E->getType()); 776 Value *Tmp = 777 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}), 778 llvm::ConstantInt::get(ArgType, 1)); 779 Value *Zero = llvm::Constant::getNullValue(ArgType); 780 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 781 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 782 if (Result->getType() != ResultType) 783 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 784 "cast"); 785 return RValue::get(Result); 786 } 787 case Builtin::BI__builtin_parity: 788 case Builtin::BI__builtin_parityl: 789 case Builtin::BI__builtin_parityll: { 790 // parity(x) -> ctpop(x) & 1 791 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 792 793 llvm::Type *ArgType = ArgValue->getType(); 794 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 795 796 llvm::Type *ResultType = ConvertType(E->getType()); 797 Value *Tmp = Builder.CreateCall(F, ArgValue); 798 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 799 if (Result->getType() != ResultType) 800 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 801 "cast"); 802 return RValue::get(Result); 803 } 804 case Builtin::BI__popcnt16: 805 case Builtin::BI__popcnt: 806 case Builtin::BI__popcnt64: 807 case Builtin::BI__builtin_popcount: 808 case Builtin::BI__builtin_popcountl: 809 case Builtin::BI__builtin_popcountll: { 810 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 811 812 llvm::Type *ArgType = ArgValue->getType(); 813 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 814 815 llvm::Type *ResultType = ConvertType(E->getType()); 816 Value *Result = Builder.CreateCall(F, ArgValue); 817 if (Result->getType() != ResultType) 818 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 819 "cast"); 820 return RValue::get(Result); 821 } 822 case Builtin::BI_rotr8: 823 case Builtin::BI_rotr16: 824 case Builtin::BI_rotr: 825 case Builtin::BI_lrotr: 826 case Builtin::BI_rotr64: { 827 Value *Val = EmitScalarExpr(E->getArg(0)); 828 Value *Shift = EmitScalarExpr(E->getArg(1)); 829 830 llvm::Type *ArgType = Val->getType(); 831 Shift = Builder.CreateIntCast(Shift, ArgType, false); 832 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 833 Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); 834 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 835 836 Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); 837 Shift = Builder.CreateAnd(Shift, Mask); 838 Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift); 839 840 Value *RightShifted = Builder.CreateLShr(Val, Shift); 841 Value *LeftShifted = Builder.CreateShl(Val, LeftShift); 842 Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); 843 844 Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); 845 Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); 846 return RValue::get(Result); 847 } 848 case Builtin::BI_rotl8: 849 case Builtin::BI_rotl16: 850 case Builtin::BI_rotl: 851 case Builtin::BI_lrotl: 852 case Builtin::BI_rotl64: { 853 Value *Val = EmitScalarExpr(E->getArg(0)); 854 Value *Shift = EmitScalarExpr(E->getArg(1)); 855 856 llvm::Type *ArgType = Val->getType(); 857 Shift = Builder.CreateIntCast(Shift, ArgType, false); 858 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 859 Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); 860 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 861 862 Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); 863 Shift = Builder.CreateAnd(Shift, Mask); 864 Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift); 865 866 Value *LeftShifted = Builder.CreateShl(Val, Shift); 867 Value *RightShifted = Builder.CreateLShr(Val, RightShift); 868 Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); 869 870 Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); 871 Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); 872 return RValue::get(Result); 873 } 874 case Builtin::BI__builtin_unpredictable: { 875 // Always return the argument of __builtin_unpredictable. LLVM does not 876 // handle this builtin. Metadata for this builtin should be added directly 877 // to instructions such as branches or switches that use it. 878 return RValue::get(EmitScalarExpr(E->getArg(0))); 879 } 880 case Builtin::BI__builtin_expect: { 881 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 882 llvm::Type *ArgType = ArgValue->getType(); 883 884 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 885 // Don't generate llvm.expect on -O0 as the backend won't use it for 886 // anything. 887 // Note, we still IRGen ExpectedValue because it could have side-effects. 888 if (CGM.getCodeGenOpts().OptimizationLevel == 0) 889 return RValue::get(ArgValue); 890 891 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); 892 Value *Result = 893 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); 894 return RValue::get(Result); 895 } 896 case Builtin::BI__builtin_assume_aligned: { 897 Value *PtrValue = EmitScalarExpr(E->getArg(0)); 898 Value *OffsetValue = 899 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; 900 901 Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); 902 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); 903 unsigned Alignment = (unsigned) AlignmentCI->getZExtValue(); 904 905 EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue); 906 return RValue::get(PtrValue); 907 } 908 case Builtin::BI__assume: 909 case Builtin::BI__builtin_assume: { 910 if (E->getArg(0)->HasSideEffects(getContext())) 911 return RValue::get(nullptr); 912 913 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 914 Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume); 915 return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); 916 } 917 case Builtin::BI__builtin_bswap16: 918 case Builtin::BI__builtin_bswap32: 919 case Builtin::BI__builtin_bswap64: { 920 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap)); 921 } 922 case Builtin::BI__builtin_bitreverse8: 923 case Builtin::BI__builtin_bitreverse16: 924 case Builtin::BI__builtin_bitreverse32: 925 case Builtin::BI__builtin_bitreverse64: { 926 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse)); 927 } 928 case Builtin::BI__builtin_object_size: { 929 unsigned Type = 930 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); 931 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType())); 932 933 // We pass this builtin onto the optimizer so that it can figure out the 934 // object size in more complex cases. 935 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType)); 936 } 937 case Builtin::BI__builtin_prefetch: { 938 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 939 // FIXME: Technically these constants should of type 'int', yes? 940 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 941 llvm::ConstantInt::get(Int32Ty, 0); 942 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 943 llvm::ConstantInt::get(Int32Ty, 3); 944 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 945 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 946 return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); 947 } 948 case Builtin::BI__builtin_readcyclecounter: { 949 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); 950 return RValue::get(Builder.CreateCall(F)); 951 } 952 case Builtin::BI__builtin___clear_cache: { 953 Value *Begin = EmitScalarExpr(E->getArg(0)); 954 Value *End = EmitScalarExpr(E->getArg(1)); 955 Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); 956 return RValue::get(Builder.CreateCall(F, {Begin, End})); 957 } 958 case Builtin::BI__builtin_trap: 959 return RValue::get(EmitTrapCall(Intrinsic::trap)); 960 case Builtin::BI__debugbreak: 961 return RValue::get(EmitTrapCall(Intrinsic::debugtrap)); 962 case Builtin::BI__builtin_unreachable: { 963 if (SanOpts.has(SanitizerKind::Unreachable)) { 964 SanitizerScope SanScope(this); 965 EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()), 966 SanitizerKind::Unreachable), 967 SanitizerHandler::BuiltinUnreachable, 968 EmitCheckSourceLocation(E->getExprLoc()), None); 969 } else 970 Builder.CreateUnreachable(); 971 972 // We do need to preserve an insertion point. 973 EmitBlock(createBasicBlock("unreachable.cont")); 974 975 return RValue::get(nullptr); 976 } 977 978 case Builtin::BI__builtin_powi: 979 case Builtin::BI__builtin_powif: 980 case Builtin::BI__builtin_powil: { 981 Value *Base = EmitScalarExpr(E->getArg(0)); 982 Value *Exponent = EmitScalarExpr(E->getArg(1)); 983 llvm::Type *ArgType = Base->getType(); 984 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); 985 return RValue::get(Builder.CreateCall(F, {Base, Exponent})); 986 } 987 988 case Builtin::BI__builtin_isgreater: 989 case Builtin::BI__builtin_isgreaterequal: 990 case Builtin::BI__builtin_isless: 991 case Builtin::BI__builtin_islessequal: 992 case Builtin::BI__builtin_islessgreater: 993 case Builtin::BI__builtin_isunordered: { 994 // Ordered comparisons: we know the arguments to these are matching scalar 995 // floating point values. 996 Value *LHS = EmitScalarExpr(E->getArg(0)); 997 Value *RHS = EmitScalarExpr(E->getArg(1)); 998 999 switch (BuiltinID) { 1000 default: llvm_unreachable("Unknown ordered comparison"); 1001 case Builtin::BI__builtin_isgreater: 1002 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 1003 break; 1004 case Builtin::BI__builtin_isgreaterequal: 1005 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 1006 break; 1007 case Builtin::BI__builtin_isless: 1008 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 1009 break; 1010 case Builtin::BI__builtin_islessequal: 1011 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 1012 break; 1013 case Builtin::BI__builtin_islessgreater: 1014 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 1015 break; 1016 case Builtin::BI__builtin_isunordered: 1017 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 1018 break; 1019 } 1020 // ZExt bool to int type. 1021 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); 1022 } 1023 case Builtin::BI__builtin_isnan: { 1024 Value *V = EmitScalarExpr(E->getArg(0)); 1025 V = Builder.CreateFCmpUNO(V, V, "cmp"); 1026 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 1027 } 1028 1029 case Builtin::BIfinite: 1030 case Builtin::BI__finite: 1031 case Builtin::BIfinitef: 1032 case Builtin::BI__finitef: 1033 case Builtin::BIfinitel: 1034 case Builtin::BI__finitel: 1035 case Builtin::BI__builtin_isinf: 1036 case Builtin::BI__builtin_isfinite: { 1037 // isinf(x) --> fabs(x) == infinity 1038 // isfinite(x) --> fabs(x) != infinity 1039 // x != NaN via the ordered compare in either case. 1040 Value *V = EmitScalarExpr(E->getArg(0)); 1041 Value *Fabs = EmitFAbs(*this, V); 1042 Constant *Infinity = ConstantFP::getInfinity(V->getType()); 1043 CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf) 1044 ? CmpInst::FCMP_OEQ 1045 : CmpInst::FCMP_ONE; 1046 Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf"); 1047 return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType()))); 1048 } 1049 1050 case Builtin::BI__builtin_isinf_sign: { 1051 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 1052 Value *Arg = EmitScalarExpr(E->getArg(0)); 1053 Value *AbsArg = EmitFAbs(*this, Arg); 1054 Value *IsInf = Builder.CreateFCmpOEQ( 1055 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf"); 1056 Value *IsNeg = EmitSignBit(*this, Arg); 1057 1058 llvm::Type *IntTy = ConvertType(E->getType()); 1059 Value *Zero = Constant::getNullValue(IntTy); 1060 Value *One = ConstantInt::get(IntTy, 1); 1061 Value *NegativeOne = ConstantInt::get(IntTy, -1); 1062 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One); 1063 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero); 1064 return RValue::get(Result); 1065 } 1066 1067 case Builtin::BI__builtin_isnormal: { 1068 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 1069 Value *V = EmitScalarExpr(E->getArg(0)); 1070 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 1071 1072 Value *Abs = EmitFAbs(*this, V); 1073 Value *IsLessThanInf = 1074 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 1075 APFloat Smallest = APFloat::getSmallestNormalized( 1076 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 1077 Value *IsNormal = 1078 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 1079 "isnormal"); 1080 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 1081 V = Builder.CreateAnd(V, IsNormal, "and"); 1082 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 1083 } 1084 1085 case Builtin::BI__builtin_fpclassify: { 1086 Value *V = EmitScalarExpr(E->getArg(5)); 1087 llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 1088 1089 // Create Result 1090 BasicBlock *Begin = Builder.GetInsertBlock(); 1091 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 1092 Builder.SetInsertPoint(End); 1093 PHINode *Result = 1094 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 1095 "fpclassify_result"); 1096 1097 // if (V==0) return FP_ZERO 1098 Builder.SetInsertPoint(Begin); 1099 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 1100 "iszero"); 1101 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 1102 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 1103 Builder.CreateCondBr(IsZero, End, NotZero); 1104 Result->addIncoming(ZeroLiteral, Begin); 1105 1106 // if (V != V) return FP_NAN 1107 Builder.SetInsertPoint(NotZero); 1108 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 1109 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 1110 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 1111 Builder.CreateCondBr(IsNan, End, NotNan); 1112 Result->addIncoming(NanLiteral, NotZero); 1113 1114 // if (fabs(V) == infinity) return FP_INFINITY 1115 Builder.SetInsertPoint(NotNan); 1116 Value *VAbs = EmitFAbs(*this, V); 1117 Value *IsInf = 1118 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 1119 "isinf"); 1120 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 1121 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 1122 Builder.CreateCondBr(IsInf, End, NotInf); 1123 Result->addIncoming(InfLiteral, NotNan); 1124 1125 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 1126 Builder.SetInsertPoint(NotInf); 1127 APFloat Smallest = APFloat::getSmallestNormalized( 1128 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 1129 Value *IsNormal = 1130 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 1131 "isnormal"); 1132 Value *NormalResult = 1133 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 1134 EmitScalarExpr(E->getArg(3))); 1135 Builder.CreateBr(End); 1136 Result->addIncoming(NormalResult, NotInf); 1137 1138 // return Result 1139 Builder.SetInsertPoint(End); 1140 return RValue::get(Result); 1141 } 1142 1143 case Builtin::BIalloca: 1144 case Builtin::BI_alloca: 1145 case Builtin::BI__builtin_alloca: { 1146 Value *Size = EmitScalarExpr(E->getArg(0)); 1147 const TargetInfo &TI = getContext().getTargetInfo(); 1148 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__. 1149 unsigned SuitableAlignmentInBytes = 1150 CGM.getContext() 1151 .toCharUnitsFromBits(TI.getSuitableAlign()) 1152 .getQuantity(); 1153 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); 1154 AI->setAlignment(SuitableAlignmentInBytes); 1155 return RValue::get(AI); 1156 } 1157 1158 case Builtin::BI__builtin_alloca_with_align: { 1159 Value *Size = EmitScalarExpr(E->getArg(0)); 1160 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1)); 1161 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue); 1162 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue(); 1163 unsigned AlignmentInBytes = 1164 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity(); 1165 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); 1166 AI->setAlignment(AlignmentInBytes); 1167 return RValue::get(AI); 1168 } 1169 1170 case Builtin::BIbzero: 1171 case Builtin::BI__builtin_bzero: { 1172 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1173 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 1174 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1175 E->getArg(0)->getExprLoc(), FD, 0); 1176 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); 1177 return RValue::get(Dest.getPointer()); 1178 } 1179 case Builtin::BImemcpy: 1180 case Builtin::BI__builtin_memcpy: { 1181 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1182 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1183 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1184 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1185 E->getArg(0)->getExprLoc(), FD, 0); 1186 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 1187 E->getArg(1)->getExprLoc(), FD, 1); 1188 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 1189 return RValue::get(Dest.getPointer()); 1190 } 1191 1192 case Builtin::BI__builtin___memcpy_chk: { 1193 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. 1194 llvm::APSInt Size, DstSize; 1195 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1196 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1197 break; 1198 if (Size.ugt(DstSize)) 1199 break; 1200 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1201 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1202 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1203 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 1204 return RValue::get(Dest.getPointer()); 1205 } 1206 1207 case Builtin::BI__builtin_objc_memmove_collectable: { 1208 Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); 1209 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1)); 1210 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1211 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 1212 DestAddr, SrcAddr, SizeVal); 1213 return RValue::get(DestAddr.getPointer()); 1214 } 1215 1216 case Builtin::BI__builtin___memmove_chk: { 1217 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. 1218 llvm::APSInt Size, DstSize; 1219 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1220 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1221 break; 1222 if (Size.ugt(DstSize)) 1223 break; 1224 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1225 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1226 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1227 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1228 return RValue::get(Dest.getPointer()); 1229 } 1230 1231 case Builtin::BImemmove: 1232 case Builtin::BI__builtin_memmove: { 1233 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1234 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1235 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1236 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1237 E->getArg(0)->getExprLoc(), FD, 0); 1238 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 1239 E->getArg(1)->getExprLoc(), FD, 1); 1240 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1241 return RValue::get(Dest.getPointer()); 1242 } 1243 case Builtin::BImemset: 1244 case Builtin::BI__builtin_memset: { 1245 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1246 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1247 Builder.getInt8Ty()); 1248 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1249 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1250 E->getArg(0)->getExprLoc(), FD, 0); 1251 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1252 return RValue::get(Dest.getPointer()); 1253 } 1254 case Builtin::BI__builtin___memset_chk: { 1255 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 1256 llvm::APSInt Size, DstSize; 1257 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1258 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1259 break; 1260 if (Size.ugt(DstSize)) 1261 break; 1262 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1263 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1264 Builder.getInt8Ty()); 1265 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1266 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1267 return RValue::get(Dest.getPointer()); 1268 } 1269 case Builtin::BI__builtin_dwarf_cfa: { 1270 // The offset in bytes from the first argument to the CFA. 1271 // 1272 // Why on earth is this in the frontend? Is there any reason at 1273 // all that the backend can't reasonably determine this while 1274 // lowering llvm.eh.dwarf.cfa()? 1275 // 1276 // TODO: If there's a satisfactory reason, add a target hook for 1277 // this instead of hard-coding 0, which is correct for most targets. 1278 int32_t Offset = 0; 1279 1280 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); 1281 return RValue::get(Builder.CreateCall(F, 1282 llvm::ConstantInt::get(Int32Ty, Offset))); 1283 } 1284 case Builtin::BI__builtin_return_address: { 1285 Value *Depth = 1286 CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this); 1287 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 1288 return RValue::get(Builder.CreateCall(F, Depth)); 1289 } 1290 case Builtin::BI_ReturnAddress: { 1291 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 1292 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); 1293 } 1294 case Builtin::BI__builtin_frame_address: { 1295 Value *Depth = 1296 CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this); 1297 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); 1298 return RValue::get(Builder.CreateCall(F, Depth)); 1299 } 1300 case Builtin::BI__builtin_extract_return_addr: { 1301 Value *Address = EmitScalarExpr(E->getArg(0)); 1302 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 1303 return RValue::get(Result); 1304 } 1305 case Builtin::BI__builtin_frob_return_addr: { 1306 Value *Address = EmitScalarExpr(E->getArg(0)); 1307 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 1308 return RValue::get(Result); 1309 } 1310 case Builtin::BI__builtin_dwarf_sp_column: { 1311 llvm::IntegerType *Ty 1312 = cast<llvm::IntegerType>(ConvertType(E->getType())); 1313 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 1314 if (Column == -1) { 1315 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 1316 return RValue::get(llvm::UndefValue::get(Ty)); 1317 } 1318 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 1319 } 1320 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 1321 Value *Address = EmitScalarExpr(E->getArg(0)); 1322 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 1323 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 1324 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 1325 } 1326 case Builtin::BI__builtin_eh_return: { 1327 Value *Int = EmitScalarExpr(E->getArg(0)); 1328 Value *Ptr = EmitScalarExpr(E->getArg(1)); 1329 1330 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 1331 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 1332 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 1333 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 1334 ? Intrinsic::eh_return_i32 1335 : Intrinsic::eh_return_i64); 1336 Builder.CreateCall(F, {Int, Ptr}); 1337 Builder.CreateUnreachable(); 1338 1339 // We do need to preserve an insertion point. 1340 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 1341 1342 return RValue::get(nullptr); 1343 } 1344 case Builtin::BI__builtin_unwind_init: { 1345 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); 1346 return RValue::get(Builder.CreateCall(F)); 1347 } 1348 case Builtin::BI__builtin_extend_pointer: { 1349 // Extends a pointer to the size of an _Unwind_Word, which is 1350 // uint64_t on all platforms. Generally this gets poked into a 1351 // register and eventually used as an address, so if the 1352 // addressing registers are wider than pointers and the platform 1353 // doesn't implicitly ignore high-order bits when doing 1354 // addressing, we need to make sure we zext / sext based on 1355 // the platform's expectations. 1356 // 1357 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 1358 1359 // Cast the pointer to intptr_t. 1360 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1361 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 1362 1363 // If that's 64 bits, we're done. 1364 if (IntPtrTy->getBitWidth() == 64) 1365 return RValue::get(Result); 1366 1367 // Otherwise, ask the codegen data what to do. 1368 if (getTargetHooks().extendPointerWithSExt()) 1369 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 1370 else 1371 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 1372 } 1373 case Builtin::BI__builtin_setjmp: { 1374 // Buffer is a void**. 1375 Address Buf = EmitPointerWithAlignment(E->getArg(0)); 1376 1377 // Store the frame pointer to the setjmp buffer. 1378 Value *FrameAddr = 1379 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 1380 ConstantInt::get(Int32Ty, 0)); 1381 Builder.CreateStore(FrameAddr, Buf); 1382 1383 // Store the stack pointer to the setjmp buffer. 1384 Value *StackAddr = 1385 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 1386 Address StackSaveSlot = 1387 Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize()); 1388 Builder.CreateStore(StackAddr, StackSaveSlot); 1389 1390 // Call LLVM's EH setjmp, which is lightweight. 1391 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 1392 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 1393 return RValue::get(Builder.CreateCall(F, Buf.getPointer())); 1394 } 1395 case Builtin::BI__builtin_longjmp: { 1396 Value *Buf = EmitScalarExpr(E->getArg(0)); 1397 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 1398 1399 // Call LLVM's EH longjmp, which is lightweight. 1400 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 1401 1402 // longjmp doesn't return; mark this as unreachable. 1403 Builder.CreateUnreachable(); 1404 1405 // We do need to preserve an insertion point. 1406 EmitBlock(createBasicBlock("longjmp.cont")); 1407 1408 return RValue::get(nullptr); 1409 } 1410 case Builtin::BI__sync_fetch_and_add: 1411 case Builtin::BI__sync_fetch_and_sub: 1412 case Builtin::BI__sync_fetch_and_or: 1413 case Builtin::BI__sync_fetch_and_and: 1414 case Builtin::BI__sync_fetch_and_xor: 1415 case Builtin::BI__sync_fetch_and_nand: 1416 case Builtin::BI__sync_add_and_fetch: 1417 case Builtin::BI__sync_sub_and_fetch: 1418 case Builtin::BI__sync_and_and_fetch: 1419 case Builtin::BI__sync_or_and_fetch: 1420 case Builtin::BI__sync_xor_and_fetch: 1421 case Builtin::BI__sync_nand_and_fetch: 1422 case Builtin::BI__sync_val_compare_and_swap: 1423 case Builtin::BI__sync_bool_compare_and_swap: 1424 case Builtin::BI__sync_lock_test_and_set: 1425 case Builtin::BI__sync_lock_release: 1426 case Builtin::BI__sync_swap: 1427 llvm_unreachable("Shouldn't make it through sema"); 1428 case Builtin::BI__sync_fetch_and_add_1: 1429 case Builtin::BI__sync_fetch_and_add_2: 1430 case Builtin::BI__sync_fetch_and_add_4: 1431 case Builtin::BI__sync_fetch_and_add_8: 1432 case Builtin::BI__sync_fetch_and_add_16: 1433 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); 1434 case Builtin::BI__sync_fetch_and_sub_1: 1435 case Builtin::BI__sync_fetch_and_sub_2: 1436 case Builtin::BI__sync_fetch_and_sub_4: 1437 case Builtin::BI__sync_fetch_and_sub_8: 1438 case Builtin::BI__sync_fetch_and_sub_16: 1439 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); 1440 case Builtin::BI__sync_fetch_and_or_1: 1441 case Builtin::BI__sync_fetch_and_or_2: 1442 case Builtin::BI__sync_fetch_and_or_4: 1443 case Builtin::BI__sync_fetch_and_or_8: 1444 case Builtin::BI__sync_fetch_and_or_16: 1445 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); 1446 case Builtin::BI__sync_fetch_and_and_1: 1447 case Builtin::BI__sync_fetch_and_and_2: 1448 case Builtin::BI__sync_fetch_and_and_4: 1449 case Builtin::BI__sync_fetch_and_and_8: 1450 case Builtin::BI__sync_fetch_and_and_16: 1451 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); 1452 case Builtin::BI__sync_fetch_and_xor_1: 1453 case Builtin::BI__sync_fetch_and_xor_2: 1454 case Builtin::BI__sync_fetch_and_xor_4: 1455 case Builtin::BI__sync_fetch_and_xor_8: 1456 case Builtin::BI__sync_fetch_and_xor_16: 1457 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); 1458 case Builtin::BI__sync_fetch_and_nand_1: 1459 case Builtin::BI__sync_fetch_and_nand_2: 1460 case Builtin::BI__sync_fetch_and_nand_4: 1461 case Builtin::BI__sync_fetch_and_nand_8: 1462 case Builtin::BI__sync_fetch_and_nand_16: 1463 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); 1464 1465 // Clang extensions: not overloaded yet. 1466 case Builtin::BI__sync_fetch_and_min: 1467 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); 1468 case Builtin::BI__sync_fetch_and_max: 1469 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); 1470 case Builtin::BI__sync_fetch_and_umin: 1471 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); 1472 case Builtin::BI__sync_fetch_and_umax: 1473 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); 1474 1475 case Builtin::BI__sync_add_and_fetch_1: 1476 case Builtin::BI__sync_add_and_fetch_2: 1477 case Builtin::BI__sync_add_and_fetch_4: 1478 case Builtin::BI__sync_add_and_fetch_8: 1479 case Builtin::BI__sync_add_and_fetch_16: 1480 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, 1481 llvm::Instruction::Add); 1482 case Builtin::BI__sync_sub_and_fetch_1: 1483 case Builtin::BI__sync_sub_and_fetch_2: 1484 case Builtin::BI__sync_sub_and_fetch_4: 1485 case Builtin::BI__sync_sub_and_fetch_8: 1486 case Builtin::BI__sync_sub_and_fetch_16: 1487 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, 1488 llvm::Instruction::Sub); 1489 case Builtin::BI__sync_and_and_fetch_1: 1490 case Builtin::BI__sync_and_and_fetch_2: 1491 case Builtin::BI__sync_and_and_fetch_4: 1492 case Builtin::BI__sync_and_and_fetch_8: 1493 case Builtin::BI__sync_and_and_fetch_16: 1494 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, 1495 llvm::Instruction::And); 1496 case Builtin::BI__sync_or_and_fetch_1: 1497 case Builtin::BI__sync_or_and_fetch_2: 1498 case Builtin::BI__sync_or_and_fetch_4: 1499 case Builtin::BI__sync_or_and_fetch_8: 1500 case Builtin::BI__sync_or_and_fetch_16: 1501 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, 1502 llvm::Instruction::Or); 1503 case Builtin::BI__sync_xor_and_fetch_1: 1504 case Builtin::BI__sync_xor_and_fetch_2: 1505 case Builtin::BI__sync_xor_and_fetch_4: 1506 case Builtin::BI__sync_xor_and_fetch_8: 1507 case Builtin::BI__sync_xor_and_fetch_16: 1508 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, 1509 llvm::Instruction::Xor); 1510 case Builtin::BI__sync_nand_and_fetch_1: 1511 case Builtin::BI__sync_nand_and_fetch_2: 1512 case Builtin::BI__sync_nand_and_fetch_4: 1513 case Builtin::BI__sync_nand_and_fetch_8: 1514 case Builtin::BI__sync_nand_and_fetch_16: 1515 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E, 1516 llvm::Instruction::And, true); 1517 1518 case Builtin::BI__sync_val_compare_and_swap_1: 1519 case Builtin::BI__sync_val_compare_and_swap_2: 1520 case Builtin::BI__sync_val_compare_and_swap_4: 1521 case Builtin::BI__sync_val_compare_and_swap_8: 1522 case Builtin::BI__sync_val_compare_and_swap_16: 1523 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false)); 1524 1525 case Builtin::BI__sync_bool_compare_and_swap_1: 1526 case Builtin::BI__sync_bool_compare_and_swap_2: 1527 case Builtin::BI__sync_bool_compare_and_swap_4: 1528 case Builtin::BI__sync_bool_compare_and_swap_8: 1529 case Builtin::BI__sync_bool_compare_and_swap_16: 1530 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true)); 1531 1532 case Builtin::BI__sync_swap_1: 1533 case Builtin::BI__sync_swap_2: 1534 case Builtin::BI__sync_swap_4: 1535 case Builtin::BI__sync_swap_8: 1536 case Builtin::BI__sync_swap_16: 1537 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1538 1539 case Builtin::BI__sync_lock_test_and_set_1: 1540 case Builtin::BI__sync_lock_test_and_set_2: 1541 case Builtin::BI__sync_lock_test_and_set_4: 1542 case Builtin::BI__sync_lock_test_and_set_8: 1543 case Builtin::BI__sync_lock_test_and_set_16: 1544 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1545 1546 case Builtin::BI__sync_lock_release_1: 1547 case Builtin::BI__sync_lock_release_2: 1548 case Builtin::BI__sync_lock_release_4: 1549 case Builtin::BI__sync_lock_release_8: 1550 case Builtin::BI__sync_lock_release_16: { 1551 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1552 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 1553 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 1554 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 1555 StoreSize.getQuantity() * 8); 1556 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 1557 llvm::StoreInst *Store = 1558 Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr, 1559 StoreSize); 1560 Store->setAtomic(llvm::AtomicOrdering::Release); 1561 return RValue::get(nullptr); 1562 } 1563 1564 case Builtin::BI__sync_synchronize: { 1565 // We assume this is supposed to correspond to a C++0x-style 1566 // sequentially-consistent fence (i.e. this is only usable for 1567 // synchonization, not device I/O or anything like that). This intrinsic 1568 // is really badly designed in the sense that in theory, there isn't 1569 // any way to safely use it... but in practice, it mostly works 1570 // to use it with non-atomic loads and stores to get acquire/release 1571 // semantics. 1572 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); 1573 return RValue::get(nullptr); 1574 } 1575 1576 case Builtin::BI__builtin_nontemporal_load: 1577 return RValue::get(EmitNontemporalLoad(*this, E)); 1578 case Builtin::BI__builtin_nontemporal_store: 1579 return RValue::get(EmitNontemporalStore(*this, E)); 1580 case Builtin::BI__c11_atomic_is_lock_free: 1581 case Builtin::BI__atomic_is_lock_free: { 1582 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the 1583 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since 1584 // _Atomic(T) is always properly-aligned. 1585 const char *LibCallName = "__atomic_is_lock_free"; 1586 CallArgList Args; 1587 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), 1588 getContext().getSizeType()); 1589 if (BuiltinID == Builtin::BI__atomic_is_lock_free) 1590 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), 1591 getContext().VoidPtrTy); 1592 else 1593 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), 1594 getContext().VoidPtrTy); 1595 const CGFunctionInfo &FuncInfo = 1596 CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); 1597 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 1598 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 1599 return EmitCall(FuncInfo, CGCallee::forDirect(Func), 1600 ReturnValueSlot(), Args); 1601 } 1602 1603 case Builtin::BI__atomic_test_and_set: { 1604 // Look at the argument type to determine whether this is a volatile 1605 // operation. The parameter type is always volatile. 1606 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1607 bool Volatile = 1608 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1609 1610 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1611 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1612 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1613 Value *NewVal = Builder.getInt8(1); 1614 Value *Order = EmitScalarExpr(E->getArg(1)); 1615 if (isa<llvm::ConstantInt>(Order)) { 1616 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1617 AtomicRMWInst *Result = nullptr; 1618 switch (ord) { 1619 case 0: // memory_order_relaxed 1620 default: // invalid order 1621 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1622 llvm::AtomicOrdering::Monotonic); 1623 break; 1624 case 1: // memory_order_consume 1625 case 2: // memory_order_acquire 1626 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1627 llvm::AtomicOrdering::Acquire); 1628 break; 1629 case 3: // memory_order_release 1630 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1631 llvm::AtomicOrdering::Release); 1632 break; 1633 case 4: // memory_order_acq_rel 1634 1635 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1636 llvm::AtomicOrdering::AcquireRelease); 1637 break; 1638 case 5: // memory_order_seq_cst 1639 Result = Builder.CreateAtomicRMW( 1640 llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1641 llvm::AtomicOrdering::SequentiallyConsistent); 1642 break; 1643 } 1644 Result->setVolatile(Volatile); 1645 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1646 } 1647 1648 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1649 1650 llvm::BasicBlock *BBs[5] = { 1651 createBasicBlock("monotonic", CurFn), 1652 createBasicBlock("acquire", CurFn), 1653 createBasicBlock("release", CurFn), 1654 createBasicBlock("acqrel", CurFn), 1655 createBasicBlock("seqcst", CurFn) 1656 }; 1657 llvm::AtomicOrdering Orders[5] = { 1658 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire, 1659 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease, 1660 llvm::AtomicOrdering::SequentiallyConsistent}; 1661 1662 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1663 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1664 1665 Builder.SetInsertPoint(ContBB); 1666 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); 1667 1668 for (unsigned i = 0; i < 5; ++i) { 1669 Builder.SetInsertPoint(BBs[i]); 1670 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1671 Ptr, NewVal, Orders[i]); 1672 RMW->setVolatile(Volatile); 1673 Result->addIncoming(RMW, BBs[i]); 1674 Builder.CreateBr(ContBB); 1675 } 1676 1677 SI->addCase(Builder.getInt32(0), BBs[0]); 1678 SI->addCase(Builder.getInt32(1), BBs[1]); 1679 SI->addCase(Builder.getInt32(2), BBs[1]); 1680 SI->addCase(Builder.getInt32(3), BBs[2]); 1681 SI->addCase(Builder.getInt32(4), BBs[3]); 1682 SI->addCase(Builder.getInt32(5), BBs[4]); 1683 1684 Builder.SetInsertPoint(ContBB); 1685 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1686 } 1687 1688 case Builtin::BI__atomic_clear: { 1689 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1690 bool Volatile = 1691 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1692 1693 Address Ptr = EmitPointerWithAlignment(E->getArg(0)); 1694 unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace(); 1695 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1696 Value *NewVal = Builder.getInt8(0); 1697 Value *Order = EmitScalarExpr(E->getArg(1)); 1698 if (isa<llvm::ConstantInt>(Order)) { 1699 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1700 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1701 switch (ord) { 1702 case 0: // memory_order_relaxed 1703 default: // invalid order 1704 Store->setOrdering(llvm::AtomicOrdering::Monotonic); 1705 break; 1706 case 3: // memory_order_release 1707 Store->setOrdering(llvm::AtomicOrdering::Release); 1708 break; 1709 case 5: // memory_order_seq_cst 1710 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent); 1711 break; 1712 } 1713 return RValue::get(nullptr); 1714 } 1715 1716 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1717 1718 llvm::BasicBlock *BBs[3] = { 1719 createBasicBlock("monotonic", CurFn), 1720 createBasicBlock("release", CurFn), 1721 createBasicBlock("seqcst", CurFn) 1722 }; 1723 llvm::AtomicOrdering Orders[3] = { 1724 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release, 1725 llvm::AtomicOrdering::SequentiallyConsistent}; 1726 1727 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1728 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1729 1730 for (unsigned i = 0; i < 3; ++i) { 1731 Builder.SetInsertPoint(BBs[i]); 1732 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1733 Store->setOrdering(Orders[i]); 1734 Builder.CreateBr(ContBB); 1735 } 1736 1737 SI->addCase(Builder.getInt32(0), BBs[0]); 1738 SI->addCase(Builder.getInt32(3), BBs[1]); 1739 SI->addCase(Builder.getInt32(5), BBs[2]); 1740 1741 Builder.SetInsertPoint(ContBB); 1742 return RValue::get(nullptr); 1743 } 1744 1745 case Builtin::BI__atomic_thread_fence: 1746 case Builtin::BI__atomic_signal_fence: 1747 case Builtin::BI__c11_atomic_thread_fence: 1748 case Builtin::BI__c11_atomic_signal_fence: { 1749 llvm::SynchronizationScope Scope; 1750 if (BuiltinID == Builtin::BI__atomic_signal_fence || 1751 BuiltinID == Builtin::BI__c11_atomic_signal_fence) 1752 Scope = llvm::SingleThread; 1753 else 1754 Scope = llvm::CrossThread; 1755 Value *Order = EmitScalarExpr(E->getArg(0)); 1756 if (isa<llvm::ConstantInt>(Order)) { 1757 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1758 switch (ord) { 1759 case 0: // memory_order_relaxed 1760 default: // invalid order 1761 break; 1762 case 1: // memory_order_consume 1763 case 2: // memory_order_acquire 1764 Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope); 1765 break; 1766 case 3: // memory_order_release 1767 Builder.CreateFence(llvm::AtomicOrdering::Release, Scope); 1768 break; 1769 case 4: // memory_order_acq_rel 1770 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope); 1771 break; 1772 case 5: // memory_order_seq_cst 1773 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 1774 Scope); 1775 break; 1776 } 1777 return RValue::get(nullptr); 1778 } 1779 1780 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; 1781 AcquireBB = createBasicBlock("acquire", CurFn); 1782 ReleaseBB = createBasicBlock("release", CurFn); 1783 AcqRelBB = createBasicBlock("acqrel", CurFn); 1784 SeqCstBB = createBasicBlock("seqcst", CurFn); 1785 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1786 1787 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1788 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); 1789 1790 Builder.SetInsertPoint(AcquireBB); 1791 Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope); 1792 Builder.CreateBr(ContBB); 1793 SI->addCase(Builder.getInt32(1), AcquireBB); 1794 SI->addCase(Builder.getInt32(2), AcquireBB); 1795 1796 Builder.SetInsertPoint(ReleaseBB); 1797 Builder.CreateFence(llvm::AtomicOrdering::Release, Scope); 1798 Builder.CreateBr(ContBB); 1799 SI->addCase(Builder.getInt32(3), ReleaseBB); 1800 1801 Builder.SetInsertPoint(AcqRelBB); 1802 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope); 1803 Builder.CreateBr(ContBB); 1804 SI->addCase(Builder.getInt32(4), AcqRelBB); 1805 1806 Builder.SetInsertPoint(SeqCstBB); 1807 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope); 1808 Builder.CreateBr(ContBB); 1809 SI->addCase(Builder.getInt32(5), SeqCstBB); 1810 1811 Builder.SetInsertPoint(ContBB); 1812 return RValue::get(nullptr); 1813 } 1814 1815 // Library functions with special handling. 1816 case Builtin::BIsqrt: 1817 case Builtin::BIsqrtf: 1818 case Builtin::BIsqrtl: { 1819 // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only 1820 // in finite- or unsafe-math mode (the intrinsic has different semantics 1821 // for handling negative numbers compared to the library function, so 1822 // -fmath-errno=0 is not enough). 1823 if (!FD->hasAttr<ConstAttr>()) 1824 break; 1825 if (!(CGM.getCodeGenOpts().UnsafeFPMath || 1826 CGM.getCodeGenOpts().NoNaNsFPMath)) 1827 break; 1828 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 1829 llvm::Type *ArgType = Arg0->getType(); 1830 Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType); 1831 return RValue::get(Builder.CreateCall(F, Arg0)); 1832 } 1833 1834 case Builtin::BI__builtin_pow: 1835 case Builtin::BI__builtin_powf: 1836 case Builtin::BI__builtin_powl: 1837 case Builtin::BIpow: 1838 case Builtin::BIpowf: 1839 case Builtin::BIpowl: { 1840 // Transform a call to pow* into a @llvm.pow.* intrinsic call. 1841 if (!FD->hasAttr<ConstAttr>()) 1842 break; 1843 Value *Base = EmitScalarExpr(E->getArg(0)); 1844 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1845 llvm::Type *ArgType = Base->getType(); 1846 Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType); 1847 return RValue::get(Builder.CreateCall(F, {Base, Exponent})); 1848 } 1849 1850 case Builtin::BIfma: 1851 case Builtin::BIfmaf: 1852 case Builtin::BIfmal: 1853 case Builtin::BI__builtin_fma: 1854 case Builtin::BI__builtin_fmaf: 1855 case Builtin::BI__builtin_fmal: { 1856 // Rewrite fma to intrinsic. 1857 Value *FirstArg = EmitScalarExpr(E->getArg(0)); 1858 llvm::Type *ArgType = FirstArg->getType(); 1859 Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType); 1860 return RValue::get( 1861 Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)), 1862 EmitScalarExpr(E->getArg(2))})); 1863 } 1864 1865 case Builtin::BI__builtin_signbit: 1866 case Builtin::BI__builtin_signbitf: 1867 case Builtin::BI__builtin_signbitl: { 1868 return RValue::get( 1869 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), 1870 ConvertType(E->getType()))); 1871 } 1872 case Builtin::BI__builtin_annotation: { 1873 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); 1874 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, 1875 AnnVal->getType()); 1876 1877 // Get the annotation string, go through casts. Sema requires this to be a 1878 // non-wide string literal, potentially casted, so the cast<> is safe. 1879 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); 1880 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); 1881 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); 1882 } 1883 case Builtin::BI__builtin_addcb: 1884 case Builtin::BI__builtin_addcs: 1885 case Builtin::BI__builtin_addc: 1886 case Builtin::BI__builtin_addcl: 1887 case Builtin::BI__builtin_addcll: 1888 case Builtin::BI__builtin_subcb: 1889 case Builtin::BI__builtin_subcs: 1890 case Builtin::BI__builtin_subc: 1891 case Builtin::BI__builtin_subcl: 1892 case Builtin::BI__builtin_subcll: { 1893 1894 // We translate all of these builtins from expressions of the form: 1895 // int x = ..., y = ..., carryin = ..., carryout, result; 1896 // result = __builtin_addc(x, y, carryin, &carryout); 1897 // 1898 // to LLVM IR of the form: 1899 // 1900 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) 1901 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 1902 // %carry1 = extractvalue {i32, i1} %tmp1, 1 1903 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, 1904 // i32 %carryin) 1905 // %result = extractvalue {i32, i1} %tmp2, 0 1906 // %carry2 = extractvalue {i32, i1} %tmp2, 1 1907 // %tmp3 = or i1 %carry1, %carry2 1908 // %tmp4 = zext i1 %tmp3 to i32 1909 // store i32 %tmp4, i32* %carryout 1910 1911 // Scalarize our inputs. 1912 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1913 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1914 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); 1915 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3)); 1916 1917 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. 1918 llvm::Intrinsic::ID IntrinsicId; 1919 switch (BuiltinID) { 1920 default: llvm_unreachable("Unknown multiprecision builtin id."); 1921 case Builtin::BI__builtin_addcb: 1922 case Builtin::BI__builtin_addcs: 1923 case Builtin::BI__builtin_addc: 1924 case Builtin::BI__builtin_addcl: 1925 case Builtin::BI__builtin_addcll: 1926 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 1927 break; 1928 case Builtin::BI__builtin_subcb: 1929 case Builtin::BI__builtin_subcs: 1930 case Builtin::BI__builtin_subc: 1931 case Builtin::BI__builtin_subcl: 1932 case Builtin::BI__builtin_subcll: 1933 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 1934 break; 1935 } 1936 1937 // Construct our resulting LLVM IR expression. 1938 llvm::Value *Carry1; 1939 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, 1940 X, Y, Carry1); 1941 llvm::Value *Carry2; 1942 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, 1943 Sum1, Carryin, Carry2); 1944 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), 1945 X->getType()); 1946 Builder.CreateStore(CarryOut, CarryOutPtr); 1947 return RValue::get(Sum2); 1948 } 1949 1950 case Builtin::BI__builtin_add_overflow: 1951 case Builtin::BI__builtin_sub_overflow: 1952 case Builtin::BI__builtin_mul_overflow: { 1953 const clang::Expr *LeftArg = E->getArg(0); 1954 const clang::Expr *RightArg = E->getArg(1); 1955 const clang::Expr *ResultArg = E->getArg(2); 1956 1957 clang::QualType ResultQTy = 1958 ResultArg->getType()->castAs<PointerType>()->getPointeeType(); 1959 1960 WidthAndSignedness LeftInfo = 1961 getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType()); 1962 WidthAndSignedness RightInfo = 1963 getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType()); 1964 WidthAndSignedness ResultInfo = 1965 getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy); 1966 WidthAndSignedness EncompassingInfo = 1967 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo}); 1968 1969 llvm::Type *EncompassingLLVMTy = 1970 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width); 1971 1972 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy); 1973 1974 llvm::Intrinsic::ID IntrinsicId; 1975 switch (BuiltinID) { 1976 default: 1977 llvm_unreachable("Unknown overflow builtin id."); 1978 case Builtin::BI__builtin_add_overflow: 1979 IntrinsicId = EncompassingInfo.Signed 1980 ? llvm::Intrinsic::sadd_with_overflow 1981 : llvm::Intrinsic::uadd_with_overflow; 1982 break; 1983 case Builtin::BI__builtin_sub_overflow: 1984 IntrinsicId = EncompassingInfo.Signed 1985 ? llvm::Intrinsic::ssub_with_overflow 1986 : llvm::Intrinsic::usub_with_overflow; 1987 break; 1988 case Builtin::BI__builtin_mul_overflow: 1989 IntrinsicId = EncompassingInfo.Signed 1990 ? llvm::Intrinsic::smul_with_overflow 1991 : llvm::Intrinsic::umul_with_overflow; 1992 break; 1993 } 1994 1995 llvm::Value *Left = EmitScalarExpr(LeftArg); 1996 llvm::Value *Right = EmitScalarExpr(RightArg); 1997 Address ResultPtr = EmitPointerWithAlignment(ResultArg); 1998 1999 // Extend each operand to the encompassing type. 2000 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed); 2001 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed); 2002 2003 // Perform the operation on the extended values. 2004 llvm::Value *Overflow, *Result; 2005 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow); 2006 2007 if (EncompassingInfo.Width > ResultInfo.Width) { 2008 // The encompassing type is wider than the result type, so we need to 2009 // truncate it. 2010 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy); 2011 2012 // To see if the truncation caused an overflow, we will extend 2013 // the result and then compare it to the original result. 2014 llvm::Value *ResultTruncExt = Builder.CreateIntCast( 2015 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed); 2016 llvm::Value *TruncationOverflow = 2017 Builder.CreateICmpNE(Result, ResultTruncExt); 2018 2019 Overflow = Builder.CreateOr(Overflow, TruncationOverflow); 2020 Result = ResultTrunc; 2021 } 2022 2023 // Finally, store the result using the pointer. 2024 bool isVolatile = 2025 ResultArg->getType()->getPointeeType().isVolatileQualified(); 2026 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile); 2027 2028 return RValue::get(Overflow); 2029 } 2030 2031 case Builtin::BI__builtin_uadd_overflow: 2032 case Builtin::BI__builtin_uaddl_overflow: 2033 case Builtin::BI__builtin_uaddll_overflow: 2034 case Builtin::BI__builtin_usub_overflow: 2035 case Builtin::BI__builtin_usubl_overflow: 2036 case Builtin::BI__builtin_usubll_overflow: 2037 case Builtin::BI__builtin_umul_overflow: 2038 case Builtin::BI__builtin_umull_overflow: 2039 case Builtin::BI__builtin_umulll_overflow: 2040 case Builtin::BI__builtin_sadd_overflow: 2041 case Builtin::BI__builtin_saddl_overflow: 2042 case Builtin::BI__builtin_saddll_overflow: 2043 case Builtin::BI__builtin_ssub_overflow: 2044 case Builtin::BI__builtin_ssubl_overflow: 2045 case Builtin::BI__builtin_ssubll_overflow: 2046 case Builtin::BI__builtin_smul_overflow: 2047 case Builtin::BI__builtin_smull_overflow: 2048 case Builtin::BI__builtin_smulll_overflow: { 2049 2050 // We translate all of these builtins directly to the relevant llvm IR node. 2051 2052 // Scalarize our inputs. 2053 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 2054 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 2055 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2)); 2056 2057 // Decide which of the overflow intrinsics we are lowering to: 2058 llvm::Intrinsic::ID IntrinsicId; 2059 switch (BuiltinID) { 2060 default: llvm_unreachable("Unknown overflow builtin id."); 2061 case Builtin::BI__builtin_uadd_overflow: 2062 case Builtin::BI__builtin_uaddl_overflow: 2063 case Builtin::BI__builtin_uaddll_overflow: 2064 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 2065 break; 2066 case Builtin::BI__builtin_usub_overflow: 2067 case Builtin::BI__builtin_usubl_overflow: 2068 case Builtin::BI__builtin_usubll_overflow: 2069 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 2070 break; 2071 case Builtin::BI__builtin_umul_overflow: 2072 case Builtin::BI__builtin_umull_overflow: 2073 case Builtin::BI__builtin_umulll_overflow: 2074 IntrinsicId = llvm::Intrinsic::umul_with_overflow; 2075 break; 2076 case Builtin::BI__builtin_sadd_overflow: 2077 case Builtin::BI__builtin_saddl_overflow: 2078 case Builtin::BI__builtin_saddll_overflow: 2079 IntrinsicId = llvm::Intrinsic::sadd_with_overflow; 2080 break; 2081 case Builtin::BI__builtin_ssub_overflow: 2082 case Builtin::BI__builtin_ssubl_overflow: 2083 case Builtin::BI__builtin_ssubll_overflow: 2084 IntrinsicId = llvm::Intrinsic::ssub_with_overflow; 2085 break; 2086 case Builtin::BI__builtin_smul_overflow: 2087 case Builtin::BI__builtin_smull_overflow: 2088 case Builtin::BI__builtin_smulll_overflow: 2089 IntrinsicId = llvm::Intrinsic::smul_with_overflow; 2090 break; 2091 } 2092 2093 2094 llvm::Value *Carry; 2095 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); 2096 Builder.CreateStore(Sum, SumOutPtr); 2097 2098 return RValue::get(Carry); 2099 } 2100 case Builtin::BI__builtin_addressof: 2101 return RValue::get(EmitLValue(E->getArg(0)).getPointer()); 2102 case Builtin::BI__builtin_operator_new: 2103 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 2104 E->getArg(0), false); 2105 case Builtin::BI__builtin_operator_delete: 2106 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 2107 E->getArg(0), true); 2108 case Builtin::BI__noop: 2109 // __noop always evaluates to an integer literal zero. 2110 return RValue::get(ConstantInt::get(IntTy, 0)); 2111 case Builtin::BI__builtin_call_with_static_chain: { 2112 const CallExpr *Call = cast<CallExpr>(E->getArg(0)); 2113 const Expr *Chain = E->getArg(1); 2114 return EmitCall(Call->getCallee()->getType(), 2115 EmitCallee(Call->getCallee()), Call, ReturnValue, 2116 EmitScalarExpr(Chain)); 2117 } 2118 case Builtin::BI_InterlockedExchange8: 2119 case Builtin::BI_InterlockedExchange16: 2120 case Builtin::BI_InterlockedExchange: 2121 case Builtin::BI_InterlockedExchangePointer: 2122 return RValue::get( 2123 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E)); 2124 case Builtin::BI_InterlockedCompareExchangePointer: { 2125 llvm::Type *RTy; 2126 llvm::IntegerType *IntType = 2127 IntegerType::get(getLLVMContext(), 2128 getContext().getTypeSize(E->getType())); 2129 llvm::Type *IntPtrType = IntType->getPointerTo(); 2130 2131 llvm::Value *Destination = 2132 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType); 2133 2134 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); 2135 RTy = Exchange->getType(); 2136 Exchange = Builder.CreatePtrToInt(Exchange, IntType); 2137 2138 llvm::Value *Comparand = 2139 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); 2140 2141 auto Result = 2142 Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, 2143 AtomicOrdering::SequentiallyConsistent, 2144 AtomicOrdering::SequentiallyConsistent); 2145 Result->setVolatile(true); 2146 2147 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, 2148 0), 2149 RTy)); 2150 } 2151 case Builtin::BI_InterlockedCompareExchange8: 2152 case Builtin::BI_InterlockedCompareExchange16: 2153 case Builtin::BI_InterlockedCompareExchange: 2154 case Builtin::BI_InterlockedCompareExchange64: { 2155 AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg( 2156 EmitScalarExpr(E->getArg(0)), 2157 EmitScalarExpr(E->getArg(2)), 2158 EmitScalarExpr(E->getArg(1)), 2159 AtomicOrdering::SequentiallyConsistent, 2160 AtomicOrdering::SequentiallyConsistent); 2161 CXI->setVolatile(true); 2162 return RValue::get(Builder.CreateExtractValue(CXI, 0)); 2163 } 2164 case Builtin::BI_InterlockedIncrement16: 2165 case Builtin::BI_InterlockedIncrement: 2166 return RValue::get( 2167 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E)); 2168 case Builtin::BI_InterlockedDecrement16: 2169 case Builtin::BI_InterlockedDecrement: 2170 return RValue::get( 2171 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E)); 2172 case Builtin::BI_InterlockedAnd8: 2173 case Builtin::BI_InterlockedAnd16: 2174 case Builtin::BI_InterlockedAnd: 2175 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E)); 2176 case Builtin::BI_InterlockedExchangeAdd8: 2177 case Builtin::BI_InterlockedExchangeAdd16: 2178 case Builtin::BI_InterlockedExchangeAdd: 2179 return RValue::get( 2180 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E)); 2181 case Builtin::BI_InterlockedExchangeSub8: 2182 case Builtin::BI_InterlockedExchangeSub16: 2183 case Builtin::BI_InterlockedExchangeSub: 2184 return RValue::get( 2185 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E)); 2186 case Builtin::BI_InterlockedOr8: 2187 case Builtin::BI_InterlockedOr16: 2188 case Builtin::BI_InterlockedOr: 2189 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E)); 2190 case Builtin::BI_InterlockedXor8: 2191 case Builtin::BI_InterlockedXor16: 2192 case Builtin::BI_InterlockedXor: 2193 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E)); 2194 case Builtin::BI__readfsdword: { 2195 llvm::Type *IntTy = ConvertType(E->getType()); 2196 Value *IntToPtr = 2197 Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 2198 llvm::PointerType::get(IntTy, 257)); 2199 LoadInst *Load = Builder.CreateAlignedLoad( 2200 IntTy, IntToPtr, getContext().getTypeAlignInChars(E->getType())); 2201 Load->setVolatile(true); 2202 return RValue::get(Load); 2203 } 2204 2205 case Builtin::BI__exception_code: 2206 case Builtin::BI_exception_code: 2207 return RValue::get(EmitSEHExceptionCode()); 2208 case Builtin::BI__exception_info: 2209 case Builtin::BI_exception_info: 2210 return RValue::get(EmitSEHExceptionInfo()); 2211 case Builtin::BI__abnormal_termination: 2212 case Builtin::BI_abnormal_termination: 2213 return RValue::get(EmitSEHAbnormalTermination()); 2214 case Builtin::BI_setjmpex: { 2215 if (getTarget().getTriple().isOSMSVCRT()) { 2216 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2217 llvm::AttributeSet ReturnsTwiceAttr = 2218 AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, 2219 llvm::Attribute::ReturnsTwice); 2220 llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction( 2221 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2222 "_setjmpex", ReturnsTwiceAttr, /*Local=*/true); 2223 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2224 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2225 llvm::Value *FrameAddr = 2226 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2227 ConstantInt::get(Int32Ty, 0)); 2228 llvm::Value *Args[] = {Buf, FrameAddr}; 2229 llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args); 2230 CS.setAttributes(ReturnsTwiceAttr); 2231 return RValue::get(CS.getInstruction()); 2232 } 2233 break; 2234 } 2235 case Builtin::BI_setjmp: { 2236 if (getTarget().getTriple().isOSMSVCRT()) { 2237 llvm::AttributeSet ReturnsTwiceAttr = 2238 AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, 2239 llvm::Attribute::ReturnsTwice); 2240 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2241 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2242 llvm::CallSite CS; 2243 if (getTarget().getTriple().getArch() == llvm::Triple::x86) { 2244 llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy}; 2245 llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction( 2246 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true), 2247 "_setjmp3", ReturnsTwiceAttr, /*Local=*/true); 2248 llvm::Value *Count = ConstantInt::get(IntTy, 0); 2249 llvm::Value *Args[] = {Buf, Count}; 2250 CS = EmitRuntimeCallOrInvoke(SetJmp3, Args); 2251 } else { 2252 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2253 llvm::Constant *SetJmp = CGM.CreateRuntimeFunction( 2254 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2255 "_setjmp", ReturnsTwiceAttr, /*Local=*/true); 2256 llvm::Value *FrameAddr = 2257 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2258 ConstantInt::get(Int32Ty, 0)); 2259 llvm::Value *Args[] = {Buf, FrameAddr}; 2260 CS = EmitRuntimeCallOrInvoke(SetJmp, Args); 2261 } 2262 CS.setAttributes(ReturnsTwiceAttr); 2263 return RValue::get(CS.getInstruction()); 2264 } 2265 break; 2266 } 2267 2268 case Builtin::BI__GetExceptionInfo: { 2269 if (llvm::GlobalVariable *GV = 2270 CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) 2271 return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy)); 2272 break; 2273 } 2274 2275 case Builtin::BI__builtin_coro_size: { 2276 auto & Context = getContext(); 2277 auto SizeTy = Context.getSizeType(); 2278 auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); 2279 Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T); 2280 return RValue::get(Builder.CreateCall(F)); 2281 } 2282 2283 case Builtin::BI__builtin_coro_id: 2284 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id); 2285 case Builtin::BI__builtin_coro_promise: 2286 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise); 2287 case Builtin::BI__builtin_coro_resume: 2288 return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume); 2289 case Builtin::BI__builtin_coro_frame: 2290 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame); 2291 case Builtin::BI__builtin_coro_free: 2292 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free); 2293 case Builtin::BI__builtin_coro_destroy: 2294 return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy); 2295 case Builtin::BI__builtin_coro_done: 2296 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done); 2297 case Builtin::BI__builtin_coro_alloc: 2298 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc); 2299 case Builtin::BI__builtin_coro_begin: 2300 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin); 2301 case Builtin::BI__builtin_coro_end: 2302 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end); 2303 case Builtin::BI__builtin_coro_suspend: 2304 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend); 2305 case Builtin::BI__builtin_coro_param: 2306 return EmitCoroutineIntrinsic(E, Intrinsic::coro_param); 2307 2308 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions 2309 case Builtin::BIread_pipe: 2310 case Builtin::BIwrite_pipe: { 2311 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2312 *Arg1 = EmitScalarExpr(E->getArg(1)); 2313 CGOpenCLRuntime OpenCLRT(CGM); 2314 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2315 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2316 2317 // Type of the generic packet parameter. 2318 unsigned GenericAS = 2319 getContext().getTargetAddressSpace(LangAS::opencl_generic); 2320 llvm::Type *I8PTy = llvm::PointerType::get( 2321 llvm::Type::getInt8Ty(getLLVMContext()), GenericAS); 2322 2323 // Testing which overloaded version we should generate the call for. 2324 if (2U == E->getNumArgs()) { 2325 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2" 2326 : "__write_pipe_2"; 2327 // Creating a generic function type to be able to call with any builtin or 2328 // user defined type. 2329 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty}; 2330 llvm::FunctionType *FTy = llvm::FunctionType::get( 2331 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2332 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); 2333 return RValue::get( 2334 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2335 {Arg0, BCast, PacketSize, PacketAlign})); 2336 } else { 2337 assert(4 == E->getNumArgs() && 2338 "Illegal number of parameters to pipe function"); 2339 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4" 2340 : "__write_pipe_4"; 2341 2342 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy, 2343 Int32Ty, Int32Ty}; 2344 Value *Arg2 = EmitScalarExpr(E->getArg(2)), 2345 *Arg3 = EmitScalarExpr(E->getArg(3)); 2346 llvm::FunctionType *FTy = llvm::FunctionType::get( 2347 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2348 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy); 2349 // We know the third argument is an integer type, but we may need to cast 2350 // it to i32. 2351 if (Arg2->getType() != Int32Ty) 2352 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty); 2353 return RValue::get(Builder.CreateCall( 2354 CGM.CreateRuntimeFunction(FTy, Name), 2355 {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign})); 2356 } 2357 } 2358 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write 2359 // functions 2360 case Builtin::BIreserve_read_pipe: 2361 case Builtin::BIreserve_write_pipe: 2362 case Builtin::BIwork_group_reserve_read_pipe: 2363 case Builtin::BIwork_group_reserve_write_pipe: 2364 case Builtin::BIsub_group_reserve_read_pipe: 2365 case Builtin::BIsub_group_reserve_write_pipe: { 2366 // Composing the mangled name for the function. 2367 const char *Name; 2368 if (BuiltinID == Builtin::BIreserve_read_pipe) 2369 Name = "__reserve_read_pipe"; 2370 else if (BuiltinID == Builtin::BIreserve_write_pipe) 2371 Name = "__reserve_write_pipe"; 2372 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe) 2373 Name = "__work_group_reserve_read_pipe"; 2374 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe) 2375 Name = "__work_group_reserve_write_pipe"; 2376 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe) 2377 Name = "__sub_group_reserve_read_pipe"; 2378 else 2379 Name = "__sub_group_reserve_write_pipe"; 2380 2381 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2382 *Arg1 = EmitScalarExpr(E->getArg(1)); 2383 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy); 2384 CGOpenCLRuntime OpenCLRT(CGM); 2385 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2386 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2387 2388 // Building the generic function prototype. 2389 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty}; 2390 llvm::FunctionType *FTy = llvm::FunctionType::get( 2391 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2392 // We know the second argument is an integer type, but we may need to cast 2393 // it to i32. 2394 if (Arg1->getType() != Int32Ty) 2395 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty); 2396 return RValue::get( 2397 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2398 {Arg0, Arg1, PacketSize, PacketAlign})); 2399 } 2400 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write 2401 // functions 2402 case Builtin::BIcommit_read_pipe: 2403 case Builtin::BIcommit_write_pipe: 2404 case Builtin::BIwork_group_commit_read_pipe: 2405 case Builtin::BIwork_group_commit_write_pipe: 2406 case Builtin::BIsub_group_commit_read_pipe: 2407 case Builtin::BIsub_group_commit_write_pipe: { 2408 const char *Name; 2409 if (BuiltinID == Builtin::BIcommit_read_pipe) 2410 Name = "__commit_read_pipe"; 2411 else if (BuiltinID == Builtin::BIcommit_write_pipe) 2412 Name = "__commit_write_pipe"; 2413 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe) 2414 Name = "__work_group_commit_read_pipe"; 2415 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe) 2416 Name = "__work_group_commit_write_pipe"; 2417 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe) 2418 Name = "__sub_group_commit_read_pipe"; 2419 else 2420 Name = "__sub_group_commit_write_pipe"; 2421 2422 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2423 *Arg1 = EmitScalarExpr(E->getArg(1)); 2424 CGOpenCLRuntime OpenCLRT(CGM); 2425 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2426 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2427 2428 // Building the generic function prototype. 2429 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty}; 2430 llvm::FunctionType *FTy = 2431 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()), 2432 llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2433 2434 return RValue::get( 2435 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2436 {Arg0, Arg1, PacketSize, PacketAlign})); 2437 } 2438 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions 2439 case Builtin::BIget_pipe_num_packets: 2440 case Builtin::BIget_pipe_max_packets: { 2441 const char *Name; 2442 if (BuiltinID == Builtin::BIget_pipe_num_packets) 2443 Name = "__get_pipe_num_packets"; 2444 else 2445 Name = "__get_pipe_max_packets"; 2446 2447 // Building the generic function prototype. 2448 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 2449 CGOpenCLRuntime OpenCLRT(CGM); 2450 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2451 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2452 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty}; 2453 llvm::FunctionType *FTy = llvm::FunctionType::get( 2454 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2455 2456 return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2457 {Arg0, PacketSize, PacketAlign})); 2458 } 2459 2460 // OpenCL v2.0 s6.13.9 - Address space qualifier functions. 2461 case Builtin::BIto_global: 2462 case Builtin::BIto_local: 2463 case Builtin::BIto_private: { 2464 auto Arg0 = EmitScalarExpr(E->getArg(0)); 2465 auto NewArgT = llvm::PointerType::get(Int8Ty, 2466 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2467 auto NewRetT = llvm::PointerType::get(Int8Ty, 2468 CGM.getContext().getTargetAddressSpace( 2469 E->getType()->getPointeeType().getAddressSpace())); 2470 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false); 2471 llvm::Value *NewArg; 2472 if (Arg0->getType()->getPointerAddressSpace() != 2473 NewArgT->getPointerAddressSpace()) 2474 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT); 2475 else 2476 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT); 2477 auto NewName = std::string("__") + E->getDirectCallee()->getName().str(); 2478 auto NewCall = 2479 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg}); 2480 return RValue::get(Builder.CreateBitOrPointerCast(NewCall, 2481 ConvertType(E->getType()))); 2482 } 2483 2484 // OpenCL v2.0, s6.13.17 - Enqueue kernel function. 2485 // It contains four different overload formats specified in Table 6.13.17.1. 2486 case Builtin::BIenqueue_kernel: { 2487 StringRef Name; // Generated function call name 2488 unsigned NumArgs = E->getNumArgs(); 2489 2490 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); 2491 llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy); 2492 2493 llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); 2494 llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); 2495 llvm::Value *Range = EmitScalarExpr(E->getArg(2)); 2496 2497 if (NumArgs == 4) { 2498 // The most basic form of the call with parameters: 2499 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) 2500 Name = "__enqueue_kernel_basic"; 2501 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy}; 2502 llvm::FunctionType *FTy = llvm::FunctionType::get( 2503 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false); 2504 2505 llvm::Value *Block = 2506 Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy); 2507 2508 return RValue::get(Builder.CreateCall( 2509 CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block})); 2510 } 2511 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); 2512 2513 // Could have events and/or vaargs. 2514 if (E->getArg(3)->getType()->isBlockPointerType()) { 2515 // No events passed, but has variadic arguments. 2516 Name = "__enqueue_kernel_vaargs"; 2517 llvm::Value *Block = 2518 Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy); 2519 // Create a vector of the arguments, as well as a constant value to 2520 // express to the runtime the number of variadic arguments. 2521 std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block, 2522 ConstantInt::get(IntTy, NumArgs - 4)}; 2523 std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy, 2524 IntTy}; 2525 2526 // Each of the following arguments specifies the size of the corresponding 2527 // argument passed to the enqueued block. 2528 for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I) 2529 Args.push_back( 2530 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy)); 2531 2532 llvm::FunctionType *FTy = llvm::FunctionType::get( 2533 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); 2534 return RValue::get( 2535 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2536 llvm::ArrayRef<llvm::Value *>(Args))); 2537 } 2538 // Any calls now have event arguments passed. 2539 if (NumArgs >= 7) { 2540 llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy); 2541 llvm::Type *EventPtrTy = EventTy->getPointerTo( 2542 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2543 2544 llvm::Value *NumEvents = 2545 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty); 2546 llvm::Value *EventList = 2547 E->getArg(4)->getType()->isArrayType() 2548 ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() 2549 : EmitScalarExpr(E->getArg(4)); 2550 llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); 2551 // Convert to generic address space. 2552 EventList = Builder.CreatePointerCast(EventList, EventPtrTy); 2553 ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy); 2554 llvm::Value *Block = 2555 Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy); 2556 2557 std::vector<llvm::Type *> ArgTys = {QueueTy, Int32Ty, RangeTy, 2558 Int32Ty, EventPtrTy, EventPtrTy, 2559 Int8PtrTy}; 2560 2561 std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, 2562 EventList, ClkEvent, Block}; 2563 2564 if (NumArgs == 7) { 2565 // Has events but no variadics. 2566 Name = "__enqueue_kernel_basic_events"; 2567 llvm::FunctionType *FTy = llvm::FunctionType::get( 2568 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2569 return RValue::get( 2570 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2571 llvm::ArrayRef<llvm::Value *>(Args))); 2572 } 2573 // Has event info and variadics 2574 // Pass the number of variadics to the runtime function too. 2575 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7)); 2576 ArgTys.push_back(Int32Ty); 2577 Name = "__enqueue_kernel_events_vaargs"; 2578 2579 // Each of the following arguments specifies the size of the corresponding 2580 // argument passed to the enqueued block. 2581 for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I) 2582 Args.push_back( 2583 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy)); 2584 2585 llvm::FunctionType *FTy = llvm::FunctionType::get( 2586 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); 2587 return RValue::get( 2588 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2589 llvm::ArrayRef<llvm::Value *>(Args))); 2590 } 2591 } 2592 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block 2593 // parameter. 2594 case Builtin::BIget_kernel_work_group_size: { 2595 Value *Arg = EmitScalarExpr(E->getArg(0)); 2596 Arg = Builder.CreateBitCast(Arg, Int8PtrTy); 2597 return RValue::get( 2598 Builder.CreateCall(CGM.CreateRuntimeFunction( 2599 llvm::FunctionType::get(IntTy, Int8PtrTy, false), 2600 "__get_kernel_work_group_size_impl"), 2601 Arg)); 2602 } 2603 case Builtin::BIget_kernel_preferred_work_group_size_multiple: { 2604 Value *Arg = EmitScalarExpr(E->getArg(0)); 2605 Arg = Builder.CreateBitCast(Arg, Int8PtrTy); 2606 return RValue::get(Builder.CreateCall( 2607 CGM.CreateRuntimeFunction( 2608 llvm::FunctionType::get(IntTy, Int8PtrTy, false), 2609 "__get_kernel_preferred_work_group_multiple_impl"), 2610 Arg)); 2611 } 2612 case Builtin::BIprintf: 2613 if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) 2614 return EmitCUDADevicePrintfCallExpr(E, ReturnValue); 2615 break; 2616 case Builtin::BI__builtin_canonicalize: 2617 case Builtin::BI__builtin_canonicalizef: 2618 case Builtin::BI__builtin_canonicalizel: 2619 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize)); 2620 2621 case Builtin::BI__builtin_thread_pointer: { 2622 if (!getContext().getTargetInfo().isTLSSupported()) 2623 CGM.ErrorUnsupported(E, "__builtin_thread_pointer"); 2624 // Fall through - it's already mapped to the intrinsic by GCCBuiltin. 2625 break; 2626 } 2627 case Builtin::BI__builtin_os_log_format: { 2628 assert(E->getNumArgs() >= 2 && 2629 "__builtin_os_log_format takes at least 2 arguments"); 2630 analyze_os_log::OSLogBufferLayout Layout; 2631 analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout); 2632 Address BufAddr = EmitPointerWithAlignment(E->getArg(0)); 2633 // Ignore argument 1, the format string. It is not currently used. 2634 CharUnits Offset; 2635 Builder.CreateStore( 2636 Builder.getInt8(Layout.getSummaryByte()), 2637 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); 2638 Builder.CreateStore( 2639 Builder.getInt8(Layout.getNumArgsByte()), 2640 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs")); 2641 2642 llvm::SmallVector<llvm::Value *, 4> RetainableOperands; 2643 for (const auto &Item : Layout.Items) { 2644 Builder.CreateStore( 2645 Builder.getInt8(Item.getDescriptorByte()), 2646 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor")); 2647 Builder.CreateStore( 2648 Builder.getInt8(Item.getSizeByte()), 2649 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize")); 2650 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset); 2651 if (const Expr *TheExpr = Item.getExpr()) { 2652 Addr = Builder.CreateElementBitCast( 2653 Addr, ConvertTypeForMem(TheExpr->getType())); 2654 // Check if this is a retainable type. 2655 if (TheExpr->getType()->isObjCRetainableType()) { 2656 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar && 2657 "Only scalar can be a ObjC retainable type"); 2658 llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false); 2659 RValue RV = RValue::get(SV); 2660 LValue LV = MakeAddrLValue(Addr, TheExpr->getType()); 2661 EmitStoreThroughLValue(RV, LV); 2662 // Check if the object is constant, if not, save it in 2663 // RetainableOperands. 2664 if (!isa<Constant>(SV)) 2665 RetainableOperands.push_back(SV); 2666 } else { 2667 EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true); 2668 } 2669 } else { 2670 Addr = Builder.CreateElementBitCast(Addr, Int32Ty); 2671 Builder.CreateStore( 2672 Builder.getInt32(Item.getConstValue().getQuantity()), Addr); 2673 } 2674 Offset += Item.size(); 2675 } 2676 2677 // Push a clang.arc.use cleanup for each object in RetainableOperands. The 2678 // cleanup will cause the use to appear after the final log call, keeping 2679 // the object valid while it’s held in the log buffer. Note that if there’s 2680 // a release cleanup on the object, it will already be active; since 2681 // cleanups are emitted in reverse order, the use will occur before the 2682 // object is released. 2683 if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount && 2684 CGM.getCodeGenOpts().OptimizationLevel != 0) 2685 for (llvm::Value *object : RetainableOperands) 2686 pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object); 2687 2688 return RValue::get(BufAddr.getPointer()); 2689 } 2690 2691 case Builtin::BI__builtin_os_log_format_buffer_size: { 2692 analyze_os_log::OSLogBufferLayout Layout; 2693 analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout); 2694 return RValue::get(ConstantInt::get(ConvertType(E->getType()), 2695 Layout.size().getQuantity())); 2696 } 2697 } 2698 2699 // If this is an alias for a lib function (e.g. __builtin_sin), emit 2700 // the call using the normal call path, but using the unmangled 2701 // version of the function name. 2702 if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) 2703 return emitLibraryCall(*this, FD, E, 2704 CGM.getBuiltinLibFunction(FD, BuiltinID)); 2705 2706 // If this is a predefined lib function (e.g. malloc), emit the call 2707 // using exactly the normal call path. 2708 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 2709 return emitLibraryCall(*this, FD, E, 2710 cast<llvm::Constant>(EmitScalarExpr(E->getCallee()))); 2711 2712 // Check that a call to a target specific builtin has the correct target 2713 // features. 2714 // This is down here to avoid non-target specific builtins, however, if 2715 // generic builtins start to require generic target features then we 2716 // can move this up to the beginning of the function. 2717 checkTargetFeatures(E, FD); 2718 2719 // See if we have a target specific intrinsic. 2720 const char *Name = getContext().BuiltinInfo.getName(BuiltinID); 2721 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 2722 StringRef Prefix = 2723 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()); 2724 if (!Prefix.empty()) { 2725 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name); 2726 // NOTE we dont need to perform a compatibility flag check here since the 2727 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the 2728 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. 2729 if (IntrinsicID == Intrinsic::not_intrinsic) 2730 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name); 2731 } 2732 2733 if (IntrinsicID != Intrinsic::not_intrinsic) { 2734 SmallVector<Value*, 16> Args; 2735 2736 // Find out if any arguments are required to be integer constant 2737 // expressions. 2738 unsigned ICEArguments = 0; 2739 ASTContext::GetBuiltinTypeError Error; 2740 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 2741 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 2742 2743 Function *F = CGM.getIntrinsic(IntrinsicID); 2744 llvm::FunctionType *FTy = F->getFunctionType(); 2745 2746 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 2747 Value *ArgValue; 2748 // If this is a normal argument, just emit it as a scalar. 2749 if ((ICEArguments & (1 << i)) == 0) { 2750 ArgValue = EmitScalarExpr(E->getArg(i)); 2751 } else { 2752 // If this is required to be a constant, constant fold it so that we 2753 // know that the generated intrinsic gets a ConstantInt. 2754 llvm::APSInt Result; 2755 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 2756 assert(IsConst && "Constant arg isn't actually constant?"); 2757 (void)IsConst; 2758 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 2759 } 2760 2761 // If the intrinsic arg type is different from the builtin arg type 2762 // we need to do a bit cast. 2763 llvm::Type *PTy = FTy->getParamType(i); 2764 if (PTy != ArgValue->getType()) { 2765 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 2766 "Must be able to losslessly bit cast to param"); 2767 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 2768 } 2769 2770 Args.push_back(ArgValue); 2771 } 2772 2773 Value *V = Builder.CreateCall(F, Args); 2774 QualType BuiltinRetType = E->getType(); 2775 2776 llvm::Type *RetTy = VoidTy; 2777 if (!BuiltinRetType->isVoidType()) 2778 RetTy = ConvertType(BuiltinRetType); 2779 2780 if (RetTy != V->getType()) { 2781 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 2782 "Must be able to losslessly bit cast result type"); 2783 V = Builder.CreateBitCast(V, RetTy); 2784 } 2785 2786 return RValue::get(V); 2787 } 2788 2789 // See if we have a target specific builtin that needs to be lowered. 2790 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 2791 return RValue::get(V); 2792 2793 ErrorUnsupported(E, "builtin function"); 2794 2795 // Unknown builtin, for now just dump it out and return undef. 2796 return GetUndefRValue(E->getType()); 2797 } 2798 2799 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, 2800 unsigned BuiltinID, const CallExpr *E, 2801 llvm::Triple::ArchType Arch) { 2802 switch (Arch) { 2803 case llvm::Triple::arm: 2804 case llvm::Triple::armeb: 2805 case llvm::Triple::thumb: 2806 case llvm::Triple::thumbeb: 2807 return CGF->EmitARMBuiltinExpr(BuiltinID, E); 2808 case llvm::Triple::aarch64: 2809 case llvm::Triple::aarch64_be: 2810 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E); 2811 case llvm::Triple::x86: 2812 case llvm::Triple::x86_64: 2813 return CGF->EmitX86BuiltinExpr(BuiltinID, E); 2814 case llvm::Triple::ppc: 2815 case llvm::Triple::ppc64: 2816 case llvm::Triple::ppc64le: 2817 return CGF->EmitPPCBuiltinExpr(BuiltinID, E); 2818 case llvm::Triple::r600: 2819 case llvm::Triple::amdgcn: 2820 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); 2821 case llvm::Triple::systemz: 2822 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E); 2823 case llvm::Triple::nvptx: 2824 case llvm::Triple::nvptx64: 2825 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E); 2826 case llvm::Triple::wasm32: 2827 case llvm::Triple::wasm64: 2828 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E); 2829 default: 2830 return nullptr; 2831 } 2832 } 2833 2834 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 2835 const CallExpr *E) { 2836 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) { 2837 assert(getContext().getAuxTargetInfo() && "Missing aux target info"); 2838 return EmitTargetArchBuiltinExpr( 2839 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E, 2840 getContext().getAuxTargetInfo()->getTriple().getArch()); 2841 } 2842 2843 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, 2844 getTarget().getTriple().getArch()); 2845 } 2846 2847 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, 2848 NeonTypeFlags TypeFlags, 2849 bool V1Ty=false) { 2850 int IsQuad = TypeFlags.isQuad(); 2851 switch (TypeFlags.getEltType()) { 2852 case NeonTypeFlags::Int8: 2853 case NeonTypeFlags::Poly8: 2854 return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); 2855 case NeonTypeFlags::Int16: 2856 case NeonTypeFlags::Poly16: 2857 case NeonTypeFlags::Float16: 2858 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 2859 case NeonTypeFlags::Int32: 2860 return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); 2861 case NeonTypeFlags::Int64: 2862 case NeonTypeFlags::Poly64: 2863 return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); 2864 case NeonTypeFlags::Poly128: 2865 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. 2866 // There is a lot of i128 and f128 API missing. 2867 // so we use v16i8 to represent poly128 and get pattern matched. 2868 return llvm::VectorType::get(CGF->Int8Ty, 16); 2869 case NeonTypeFlags::Float32: 2870 return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); 2871 case NeonTypeFlags::Float64: 2872 return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); 2873 } 2874 llvm_unreachable("Unknown vector element type!"); 2875 } 2876 2877 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF, 2878 NeonTypeFlags IntTypeFlags) { 2879 int IsQuad = IntTypeFlags.isQuad(); 2880 switch (IntTypeFlags.getEltType()) { 2881 case NeonTypeFlags::Int32: 2882 return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad)); 2883 case NeonTypeFlags::Int64: 2884 return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad)); 2885 default: 2886 llvm_unreachable("Type can't be converted to floating-point!"); 2887 } 2888 } 2889 2890 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 2891 unsigned nElts = V->getType()->getVectorNumElements(); 2892 Value* SV = llvm::ConstantVector::getSplat(nElts, C); 2893 return Builder.CreateShuffleVector(V, V, SV, "lane"); 2894 } 2895 2896 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 2897 const char *name, 2898 unsigned shift, bool rightshift) { 2899 unsigned j = 0; 2900 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 2901 ai != ae; ++ai, ++j) 2902 if (shift > 0 && shift == j) 2903 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 2904 else 2905 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 2906 2907 return Builder.CreateCall(F, Ops, name); 2908 } 2909 2910 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 2911 bool neg) { 2912 int SV = cast<ConstantInt>(V)->getSExtValue(); 2913 return ConstantInt::get(Ty, neg ? -SV : SV); 2914 } 2915 2916 // \brief Right-shift a vector by a constant. 2917 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, 2918 llvm::Type *Ty, bool usgn, 2919 const char *name) { 2920 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 2921 2922 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); 2923 int EltSize = VTy->getScalarSizeInBits(); 2924 2925 Vec = Builder.CreateBitCast(Vec, Ty); 2926 2927 // lshr/ashr are undefined when the shift amount is equal to the vector 2928 // element size. 2929 if (ShiftAmt == EltSize) { 2930 if (usgn) { 2931 // Right-shifting an unsigned value by its size yields 0. 2932 return llvm::ConstantAggregateZero::get(VTy); 2933 } else { 2934 // Right-shifting a signed value by its size is equivalent 2935 // to a shift of size-1. 2936 --ShiftAmt; 2937 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); 2938 } 2939 } 2940 2941 Shift = EmitNeonShiftVector(Shift, Ty, false); 2942 if (usgn) 2943 return Builder.CreateLShr(Vec, Shift, name); 2944 else 2945 return Builder.CreateAShr(Vec, Shift, name); 2946 } 2947 2948 enum { 2949 AddRetType = (1 << 0), 2950 Add1ArgType = (1 << 1), 2951 Add2ArgTypes = (1 << 2), 2952 2953 VectorizeRetType = (1 << 3), 2954 VectorizeArgTypes = (1 << 4), 2955 2956 InventFloatType = (1 << 5), 2957 UnsignedAlts = (1 << 6), 2958 2959 Use64BitVectors = (1 << 7), 2960 Use128BitVectors = (1 << 8), 2961 2962 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, 2963 VectorRet = AddRetType | VectorizeRetType, 2964 VectorRetGetArgs01 = 2965 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, 2966 FpCmpzModifiers = 2967 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType 2968 }; 2969 2970 namespace { 2971 struct NeonIntrinsicInfo { 2972 const char *NameHint; 2973 unsigned BuiltinID; 2974 unsigned LLVMIntrinsic; 2975 unsigned AltLLVMIntrinsic; 2976 unsigned TypeModifier; 2977 2978 bool operator<(unsigned RHSBuiltinID) const { 2979 return BuiltinID < RHSBuiltinID; 2980 } 2981 bool operator<(const NeonIntrinsicInfo &TE) const { 2982 return BuiltinID < TE.BuiltinID; 2983 } 2984 }; 2985 } // end anonymous namespace 2986 2987 #define NEONMAP0(NameBase) \ 2988 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 } 2989 2990 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 2991 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 2992 Intrinsic::LLVMIntrinsic, 0, TypeModifier } 2993 2994 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ 2995 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 2996 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ 2997 TypeModifier } 2998 2999 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { 3000 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 3001 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 3002 NEONMAP1(vabs_v, arm_neon_vabs, 0), 3003 NEONMAP1(vabsq_v, arm_neon_vabs, 0), 3004 NEONMAP0(vaddhn_v), 3005 NEONMAP1(vaesdq_v, arm_neon_aesd, 0), 3006 NEONMAP1(vaeseq_v, arm_neon_aese, 0), 3007 NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), 3008 NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), 3009 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), 3010 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), 3011 NEONMAP1(vcage_v, arm_neon_vacge, 0), 3012 NEONMAP1(vcageq_v, arm_neon_vacge, 0), 3013 NEONMAP1(vcagt_v, arm_neon_vacgt, 0), 3014 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0), 3015 NEONMAP1(vcale_v, arm_neon_vacge, 0), 3016 NEONMAP1(vcaleq_v, arm_neon_vacge, 0), 3017 NEONMAP1(vcalt_v, arm_neon_vacgt, 0), 3018 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), 3019 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), 3020 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), 3021 NEONMAP1(vclz_v, ctlz, Add1ArgType), 3022 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 3023 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 3024 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 3025 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0), 3026 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), 3027 NEONMAP0(vcvt_f32_v), 3028 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3029 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), 3030 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), 3031 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), 3032 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), 3033 NEONMAP0(vcvt_s32_v), 3034 NEONMAP0(vcvt_s64_v), 3035 NEONMAP0(vcvt_u32_v), 3036 NEONMAP0(vcvt_u64_v), 3037 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), 3038 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), 3039 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), 3040 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), 3041 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), 3042 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), 3043 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), 3044 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), 3045 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), 3046 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), 3047 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), 3048 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), 3049 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), 3050 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), 3051 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), 3052 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), 3053 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), 3054 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), 3055 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), 3056 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), 3057 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), 3058 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), 3059 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), 3060 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), 3061 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), 3062 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), 3063 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), 3064 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), 3065 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), 3066 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), 3067 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), 3068 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), 3069 NEONMAP0(vcvtq_f32_v), 3070 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3071 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), 3072 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), 3073 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), 3074 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), 3075 NEONMAP0(vcvtq_s32_v), 3076 NEONMAP0(vcvtq_s64_v), 3077 NEONMAP0(vcvtq_u32_v), 3078 NEONMAP0(vcvtq_u64_v), 3079 NEONMAP0(vext_v), 3080 NEONMAP0(vextq_v), 3081 NEONMAP0(vfma_v), 3082 NEONMAP0(vfmaq_v), 3083 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 3084 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 3085 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 3086 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 3087 NEONMAP0(vld1_dup_v), 3088 NEONMAP1(vld1_v, arm_neon_vld1, 0), 3089 NEONMAP0(vld1q_dup_v), 3090 NEONMAP1(vld1q_v, arm_neon_vld1, 0), 3091 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), 3092 NEONMAP1(vld2_v, arm_neon_vld2, 0), 3093 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), 3094 NEONMAP1(vld2q_v, arm_neon_vld2, 0), 3095 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), 3096 NEONMAP1(vld3_v, arm_neon_vld3, 0), 3097 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), 3098 NEONMAP1(vld3q_v, arm_neon_vld3, 0), 3099 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), 3100 NEONMAP1(vld4_v, arm_neon_vld4, 0), 3101 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), 3102 NEONMAP1(vld4q_v, arm_neon_vld4, 0), 3103 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 3104 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), 3105 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), 3106 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 3107 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 3108 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), 3109 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), 3110 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 3111 NEONMAP0(vmovl_v), 3112 NEONMAP0(vmovn_v), 3113 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), 3114 NEONMAP0(vmull_v), 3115 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType), 3116 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 3117 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 3118 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType), 3119 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 3120 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 3121 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType), 3122 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts), 3123 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), 3124 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), 3125 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), 3126 NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 3127 NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 3128 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0), 3129 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0), 3130 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), 3131 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), 3132 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), 3133 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts), 3134 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), 3135 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), 3136 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), 3137 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), 3138 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), 3139 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 3140 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 3141 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 3142 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 3143 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 3144 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 3145 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), 3146 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), 3147 NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 3148 NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 3149 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), 3150 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 3151 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 3152 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType), 3153 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), 3154 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 3155 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 3156 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), 3157 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), 3158 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), 3159 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), 3160 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), 3161 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), 3162 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), 3163 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), 3164 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), 3165 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), 3166 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), 3167 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), 3168 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 3169 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 3170 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 3171 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 3172 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 3173 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 3174 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), 3175 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), 3176 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), 3177 NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0), 3178 NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0), 3179 NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0), 3180 NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0), 3181 NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0), 3182 NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0), 3183 NEONMAP0(vshl_n_v), 3184 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 3185 NEONMAP0(vshll_n_v), 3186 NEONMAP0(vshlq_n_v), 3187 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 3188 NEONMAP0(vshr_n_v), 3189 NEONMAP0(vshrn_n_v), 3190 NEONMAP0(vshrq_n_v), 3191 NEONMAP1(vst1_v, arm_neon_vst1, 0), 3192 NEONMAP1(vst1q_v, arm_neon_vst1, 0), 3193 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), 3194 NEONMAP1(vst2_v, arm_neon_vst2, 0), 3195 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), 3196 NEONMAP1(vst2q_v, arm_neon_vst2, 0), 3197 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0), 3198 NEONMAP1(vst3_v, arm_neon_vst3, 0), 3199 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0), 3200 NEONMAP1(vst3q_v, arm_neon_vst3, 0), 3201 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0), 3202 NEONMAP1(vst4_v, arm_neon_vst4, 0), 3203 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0), 3204 NEONMAP1(vst4q_v, arm_neon_vst4, 0), 3205 NEONMAP0(vsubhn_v), 3206 NEONMAP0(vtrn_v), 3207 NEONMAP0(vtrnq_v), 3208 NEONMAP0(vtst_v), 3209 NEONMAP0(vtstq_v), 3210 NEONMAP0(vuzp_v), 3211 NEONMAP0(vuzpq_v), 3212 NEONMAP0(vzip_v), 3213 NEONMAP0(vzipq_v) 3214 }; 3215 3216 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { 3217 NEONMAP1(vabs_v, aarch64_neon_abs, 0), 3218 NEONMAP1(vabsq_v, aarch64_neon_abs, 0), 3219 NEONMAP0(vaddhn_v), 3220 NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), 3221 NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), 3222 NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), 3223 NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), 3224 NEONMAP1(vcage_v, aarch64_neon_facge, 0), 3225 NEONMAP1(vcageq_v, aarch64_neon_facge, 0), 3226 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), 3227 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), 3228 NEONMAP1(vcale_v, aarch64_neon_facge, 0), 3229 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), 3230 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), 3231 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), 3232 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), 3233 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), 3234 NEONMAP1(vclz_v, ctlz, Add1ArgType), 3235 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 3236 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 3237 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 3238 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), 3239 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), 3240 NEONMAP0(vcvt_f32_v), 3241 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3242 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3243 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 3244 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 3245 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 3246 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 3247 NEONMAP0(vcvtq_f32_v), 3248 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3249 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3250 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 3251 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 3252 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 3253 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 3254 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), 3255 NEONMAP0(vext_v), 3256 NEONMAP0(vextq_v), 3257 NEONMAP0(vfma_v), 3258 NEONMAP0(vfmaq_v), 3259 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 3260 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 3261 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 3262 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 3263 NEONMAP0(vmovl_v), 3264 NEONMAP0(vmovn_v), 3265 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), 3266 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), 3267 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), 3268 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 3269 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 3270 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), 3271 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), 3272 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), 3273 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 3274 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 3275 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), 3276 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), 3277 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), 3278 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), 3279 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), 3280 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), 3281 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), 3282 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), 3283 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), 3284 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), 3285 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), 3286 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 3287 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 3288 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), 3289 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 3290 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), 3291 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 3292 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), 3293 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), 3294 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 3295 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 3296 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), 3297 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 3298 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 3299 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), 3300 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), 3301 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 3302 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 3303 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 3304 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 3305 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 3306 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 3307 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 3308 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 3309 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), 3310 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), 3311 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), 3312 NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0), 3313 NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0), 3314 NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0), 3315 NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0), 3316 NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0), 3317 NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0), 3318 NEONMAP0(vshl_n_v), 3319 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 3320 NEONMAP0(vshll_n_v), 3321 NEONMAP0(vshlq_n_v), 3322 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 3323 NEONMAP0(vshr_n_v), 3324 NEONMAP0(vshrn_n_v), 3325 NEONMAP0(vshrq_n_v), 3326 NEONMAP0(vsubhn_v), 3327 NEONMAP0(vtst_v), 3328 NEONMAP0(vtstq_v), 3329 }; 3330 3331 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { 3332 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), 3333 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), 3334 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), 3335 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 3336 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 3337 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 3338 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 3339 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 3340 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 3341 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3342 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 3343 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), 3344 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 3345 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), 3346 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3347 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3348 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 3349 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 3350 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 3351 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 3352 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 3353 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 3354 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 3355 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 3356 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 3357 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 3358 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 3359 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 3360 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 3361 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 3362 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 3363 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 3364 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 3365 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 3366 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 3367 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 3368 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 3369 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 3370 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 3371 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 3372 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 3373 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 3374 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 3375 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 3376 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 3377 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 3378 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 3379 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 3380 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), 3381 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3382 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3383 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3384 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3385 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 3386 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 3387 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3388 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3389 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 3390 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 3391 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3392 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3393 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3394 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3395 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 3396 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 3397 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3398 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 3399 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 3400 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 3401 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), 3402 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), 3403 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), 3404 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3405 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3406 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3407 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3408 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3409 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3410 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3411 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3412 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 3413 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3414 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 3415 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), 3416 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 3417 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), 3418 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 3419 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 3420 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), 3421 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), 3422 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 3423 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 3424 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), 3425 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), 3426 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), 3427 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), 3428 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), 3429 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), 3430 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), 3431 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), 3432 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 3433 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 3434 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 3435 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 3436 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), 3437 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 3438 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 3439 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 3440 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), 3441 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 3442 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), 3443 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), 3444 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), 3445 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 3446 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 3447 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), 3448 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), 3449 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 3450 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 3451 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), 3452 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), 3453 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), 3454 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), 3455 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 3456 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 3457 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 3458 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 3459 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), 3460 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 3461 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 3462 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3463 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3464 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3465 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3466 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), 3467 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), 3468 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3469 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3470 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3471 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3472 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), 3473 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), 3474 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), 3475 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), 3476 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 3477 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 3478 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), 3479 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), 3480 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), 3481 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 3482 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 3483 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 3484 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 3485 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), 3486 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 3487 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 3488 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 3489 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 3490 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), 3491 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), 3492 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 3493 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 3494 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), 3495 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), 3496 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), 3497 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), 3498 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), 3499 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), 3500 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), 3501 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), 3502 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), 3503 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), 3504 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), 3505 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), 3506 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), 3507 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), 3508 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), 3509 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), 3510 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), 3511 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), 3512 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), 3513 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), 3514 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 3515 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), 3516 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 3517 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), 3518 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), 3519 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), 3520 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 3521 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), 3522 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 3523 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), 3524 }; 3525 3526 #undef NEONMAP0 3527 #undef NEONMAP1 3528 #undef NEONMAP2 3529 3530 static bool NEONSIMDIntrinsicsProvenSorted = false; 3531 3532 static bool AArch64SIMDIntrinsicsProvenSorted = false; 3533 static bool AArch64SISDIntrinsicsProvenSorted = false; 3534 3535 3536 static const NeonIntrinsicInfo * 3537 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap, 3538 unsigned BuiltinID, bool &MapProvenSorted) { 3539 3540 #ifndef NDEBUG 3541 if (!MapProvenSorted) { 3542 assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap))); 3543 MapProvenSorted = true; 3544 } 3545 #endif 3546 3547 const NeonIntrinsicInfo *Builtin = 3548 std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID); 3549 3550 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) 3551 return Builtin; 3552 3553 return nullptr; 3554 } 3555 3556 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, 3557 unsigned Modifier, 3558 llvm::Type *ArgType, 3559 const CallExpr *E) { 3560 int VectorSize = 0; 3561 if (Modifier & Use64BitVectors) 3562 VectorSize = 64; 3563 else if (Modifier & Use128BitVectors) 3564 VectorSize = 128; 3565 3566 // Return type. 3567 SmallVector<llvm::Type *, 3> Tys; 3568 if (Modifier & AddRetType) { 3569 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 3570 if (Modifier & VectorizeRetType) 3571 Ty = llvm::VectorType::get( 3572 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); 3573 3574 Tys.push_back(Ty); 3575 } 3576 3577 // Arguments. 3578 if (Modifier & VectorizeArgTypes) { 3579 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1; 3580 ArgType = llvm::VectorType::get(ArgType, Elts); 3581 } 3582 3583 if (Modifier & (Add1ArgType | Add2ArgTypes)) 3584 Tys.push_back(ArgType); 3585 3586 if (Modifier & Add2ArgTypes) 3587 Tys.push_back(ArgType); 3588 3589 if (Modifier & InventFloatType) 3590 Tys.push_back(FloatTy); 3591 3592 return CGM.getIntrinsic(IntrinsicID, Tys); 3593 } 3594 3595 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, 3596 const NeonIntrinsicInfo &SISDInfo, 3597 SmallVectorImpl<Value *> &Ops, 3598 const CallExpr *E) { 3599 unsigned BuiltinID = SISDInfo.BuiltinID; 3600 unsigned int Int = SISDInfo.LLVMIntrinsic; 3601 unsigned Modifier = SISDInfo.TypeModifier; 3602 const char *s = SISDInfo.NameHint; 3603 3604 switch (BuiltinID) { 3605 case NEON::BI__builtin_neon_vcled_s64: 3606 case NEON::BI__builtin_neon_vcled_u64: 3607 case NEON::BI__builtin_neon_vcles_f32: 3608 case NEON::BI__builtin_neon_vcled_f64: 3609 case NEON::BI__builtin_neon_vcltd_s64: 3610 case NEON::BI__builtin_neon_vcltd_u64: 3611 case NEON::BI__builtin_neon_vclts_f32: 3612 case NEON::BI__builtin_neon_vcltd_f64: 3613 case NEON::BI__builtin_neon_vcales_f32: 3614 case NEON::BI__builtin_neon_vcaled_f64: 3615 case NEON::BI__builtin_neon_vcalts_f32: 3616 case NEON::BI__builtin_neon_vcaltd_f64: 3617 // Only one direction of comparisons actually exist, cmle is actually a cmge 3618 // with swapped operands. The table gives us the right intrinsic but we 3619 // still need to do the swap. 3620 std::swap(Ops[0], Ops[1]); 3621 break; 3622 } 3623 3624 assert(Int && "Generic code assumes a valid intrinsic"); 3625 3626 // Determine the type(s) of this overloaded AArch64 intrinsic. 3627 const Expr *Arg = E->getArg(0); 3628 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); 3629 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); 3630 3631 int j = 0; 3632 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); 3633 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 3634 ai != ae; ++ai, ++j) { 3635 llvm::Type *ArgTy = ai->getType(); 3636 if (Ops[j]->getType()->getPrimitiveSizeInBits() == 3637 ArgTy->getPrimitiveSizeInBits()) 3638 continue; 3639 3640 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); 3641 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate 3642 // it before inserting. 3643 Ops[j] = 3644 CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType()); 3645 Ops[j] = 3646 CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0); 3647 } 3648 3649 Value *Result = CGF.EmitNeonCall(F, Ops, s); 3650 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 3651 if (ResultType->getPrimitiveSizeInBits() < 3652 Result->getType()->getPrimitiveSizeInBits()) 3653 return CGF.Builder.CreateExtractElement(Result, C0); 3654 3655 return CGF.Builder.CreateBitCast(Result, ResultType, s); 3656 } 3657 3658 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( 3659 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, 3660 const char *NameHint, unsigned Modifier, const CallExpr *E, 3661 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) { 3662 // Get the last argument, which specifies the vector type. 3663 llvm::APSInt NeonTypeConst; 3664 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 3665 if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext())) 3666 return nullptr; 3667 3668 // Determine the type of this overloaded NEON intrinsic. 3669 NeonTypeFlags Type(NeonTypeConst.getZExtValue()); 3670 bool Usgn = Type.isUnsigned(); 3671 bool Quad = Type.isQuad(); 3672 3673 llvm::VectorType *VTy = GetNeonType(this, Type); 3674 llvm::Type *Ty = VTy; 3675 if (!Ty) 3676 return nullptr; 3677 3678 auto getAlignmentValue32 = [&](Address addr) -> Value* { 3679 return Builder.getInt32(addr.getAlignment().getQuantity()); 3680 }; 3681 3682 unsigned Int = LLVMIntrinsic; 3683 if ((Modifier & UnsignedAlts) && !Usgn) 3684 Int = AltLLVMIntrinsic; 3685 3686 switch (BuiltinID) { 3687 default: break; 3688 case NEON::BI__builtin_neon_vabs_v: 3689 case NEON::BI__builtin_neon_vabsq_v: 3690 if (VTy->getElementType()->isFloatingPointTy()) 3691 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); 3692 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); 3693 case NEON::BI__builtin_neon_vaddhn_v: { 3694 llvm::VectorType *SrcTy = 3695 llvm::VectorType::getExtendedElementVectorType(VTy); 3696 3697 // %sum = add <4 x i32> %lhs, %rhs 3698 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3699 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 3700 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); 3701 3702 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 3703 Constant *ShiftAmt = 3704 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 3705 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); 3706 3707 // %res = trunc <4 x i32> %high to <4 x i16> 3708 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); 3709 } 3710 case NEON::BI__builtin_neon_vcale_v: 3711 case NEON::BI__builtin_neon_vcaleq_v: 3712 case NEON::BI__builtin_neon_vcalt_v: 3713 case NEON::BI__builtin_neon_vcaltq_v: 3714 std::swap(Ops[0], Ops[1]); 3715 case NEON::BI__builtin_neon_vcage_v: 3716 case NEON::BI__builtin_neon_vcageq_v: 3717 case NEON::BI__builtin_neon_vcagt_v: 3718 case NEON::BI__builtin_neon_vcagtq_v: { 3719 llvm::Type *VecFlt = llvm::VectorType::get( 3720 VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy, 3721 VTy->getNumElements()); 3722 llvm::Type *Tys[] = { VTy, VecFlt }; 3723 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3724 return EmitNeonCall(F, Ops, NameHint); 3725 } 3726 case NEON::BI__builtin_neon_vclz_v: 3727 case NEON::BI__builtin_neon_vclzq_v: 3728 // We generate target-independent intrinsic, which needs a second argument 3729 // for whether or not clz of zero is undefined; on ARM it isn't. 3730 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); 3731 break; 3732 case NEON::BI__builtin_neon_vcvt_f32_v: 3733 case NEON::BI__builtin_neon_vcvtq_f32_v: 3734 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3735 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad)); 3736 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 3737 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 3738 case NEON::BI__builtin_neon_vcvt_n_f32_v: 3739 case NEON::BI__builtin_neon_vcvt_n_f64_v: 3740 case NEON::BI__builtin_neon_vcvtq_n_f32_v: 3741 case NEON::BI__builtin_neon_vcvtq_n_f64_v: { 3742 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; 3743 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; 3744 Function *F = CGM.getIntrinsic(Int, Tys); 3745 return EmitNeonCall(F, Ops, "vcvt_n"); 3746 } 3747 case NEON::BI__builtin_neon_vcvt_n_s32_v: 3748 case NEON::BI__builtin_neon_vcvt_n_u32_v: 3749 case NEON::BI__builtin_neon_vcvt_n_s64_v: 3750 case NEON::BI__builtin_neon_vcvt_n_u64_v: 3751 case NEON::BI__builtin_neon_vcvtq_n_s32_v: 3752 case NEON::BI__builtin_neon_vcvtq_n_u32_v: 3753 case NEON::BI__builtin_neon_vcvtq_n_s64_v: 3754 case NEON::BI__builtin_neon_vcvtq_n_u64_v: { 3755 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 3756 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3757 return EmitNeonCall(F, Ops, "vcvt_n"); 3758 } 3759 case NEON::BI__builtin_neon_vcvt_s32_v: 3760 case NEON::BI__builtin_neon_vcvt_u32_v: 3761 case NEON::BI__builtin_neon_vcvt_s64_v: 3762 case NEON::BI__builtin_neon_vcvt_u64_v: 3763 case NEON::BI__builtin_neon_vcvtq_s32_v: 3764 case NEON::BI__builtin_neon_vcvtq_u32_v: 3765 case NEON::BI__builtin_neon_vcvtq_s64_v: 3766 case NEON::BI__builtin_neon_vcvtq_u64_v: { 3767 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 3768 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 3769 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 3770 } 3771 case NEON::BI__builtin_neon_vcvta_s32_v: 3772 case NEON::BI__builtin_neon_vcvta_s64_v: 3773 case NEON::BI__builtin_neon_vcvta_u32_v: 3774 case NEON::BI__builtin_neon_vcvta_u64_v: 3775 case NEON::BI__builtin_neon_vcvtaq_s32_v: 3776 case NEON::BI__builtin_neon_vcvtaq_s64_v: 3777 case NEON::BI__builtin_neon_vcvtaq_u32_v: 3778 case NEON::BI__builtin_neon_vcvtaq_u64_v: 3779 case NEON::BI__builtin_neon_vcvtn_s32_v: 3780 case NEON::BI__builtin_neon_vcvtn_s64_v: 3781 case NEON::BI__builtin_neon_vcvtn_u32_v: 3782 case NEON::BI__builtin_neon_vcvtn_u64_v: 3783 case NEON::BI__builtin_neon_vcvtnq_s32_v: 3784 case NEON::BI__builtin_neon_vcvtnq_s64_v: 3785 case NEON::BI__builtin_neon_vcvtnq_u32_v: 3786 case NEON::BI__builtin_neon_vcvtnq_u64_v: 3787 case NEON::BI__builtin_neon_vcvtp_s32_v: 3788 case NEON::BI__builtin_neon_vcvtp_s64_v: 3789 case NEON::BI__builtin_neon_vcvtp_u32_v: 3790 case NEON::BI__builtin_neon_vcvtp_u64_v: 3791 case NEON::BI__builtin_neon_vcvtpq_s32_v: 3792 case NEON::BI__builtin_neon_vcvtpq_s64_v: 3793 case NEON::BI__builtin_neon_vcvtpq_u32_v: 3794 case NEON::BI__builtin_neon_vcvtpq_u64_v: 3795 case NEON::BI__builtin_neon_vcvtm_s32_v: 3796 case NEON::BI__builtin_neon_vcvtm_s64_v: 3797 case NEON::BI__builtin_neon_vcvtm_u32_v: 3798 case NEON::BI__builtin_neon_vcvtm_u64_v: 3799 case NEON::BI__builtin_neon_vcvtmq_s32_v: 3800 case NEON::BI__builtin_neon_vcvtmq_s64_v: 3801 case NEON::BI__builtin_neon_vcvtmq_u32_v: 3802 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 3803 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 3804 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); 3805 } 3806 case NEON::BI__builtin_neon_vext_v: 3807 case NEON::BI__builtin_neon_vextq_v: { 3808 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 3809 SmallVector<uint32_t, 16> Indices; 3810 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 3811 Indices.push_back(i+CV); 3812 3813 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3814 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3815 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext"); 3816 } 3817 case NEON::BI__builtin_neon_vfma_v: 3818 case NEON::BI__builtin_neon_vfmaq_v: { 3819 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 3820 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3821 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3822 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3823 3824 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 3825 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 3826 } 3827 case NEON::BI__builtin_neon_vld1_v: 3828 case NEON::BI__builtin_neon_vld1q_v: { 3829 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 3830 Ops.push_back(getAlignmentValue32(PtrOp0)); 3831 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1"); 3832 } 3833 case NEON::BI__builtin_neon_vld2_v: 3834 case NEON::BI__builtin_neon_vld2q_v: 3835 case NEON::BI__builtin_neon_vld3_v: 3836 case NEON::BI__builtin_neon_vld3q_v: 3837 case NEON::BI__builtin_neon_vld4_v: 3838 case NEON::BI__builtin_neon_vld4q_v: { 3839 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 3840 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3841 Value *Align = getAlignmentValue32(PtrOp1); 3842 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint); 3843 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3844 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3845 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 3846 } 3847 case NEON::BI__builtin_neon_vld1_dup_v: 3848 case NEON::BI__builtin_neon_vld1q_dup_v: { 3849 Value *V = UndefValue::get(Ty); 3850 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 3851 PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty); 3852 LoadInst *Ld = Builder.CreateLoad(PtrOp0); 3853 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 3854 Ops[0] = Builder.CreateInsertElement(V, Ld, CI); 3855 return EmitNeonSplat(Ops[0], CI); 3856 } 3857 case NEON::BI__builtin_neon_vld2_lane_v: 3858 case NEON::BI__builtin_neon_vld2q_lane_v: 3859 case NEON::BI__builtin_neon_vld3_lane_v: 3860 case NEON::BI__builtin_neon_vld3q_lane_v: 3861 case NEON::BI__builtin_neon_vld4_lane_v: 3862 case NEON::BI__builtin_neon_vld4q_lane_v: { 3863 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 3864 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3865 for (unsigned I = 2; I < Ops.size() - 1; ++I) 3866 Ops[I] = Builder.CreateBitCast(Ops[I], Ty); 3867 Ops.push_back(getAlignmentValue32(PtrOp1)); 3868 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint); 3869 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3870 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3871 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 3872 } 3873 case NEON::BI__builtin_neon_vmovl_v: { 3874 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 3875 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 3876 if (Usgn) 3877 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 3878 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 3879 } 3880 case NEON::BI__builtin_neon_vmovn_v: { 3881 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 3882 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 3883 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 3884 } 3885 case NEON::BI__builtin_neon_vmull_v: 3886 // FIXME: the integer vmull operations could be emitted in terms of pure 3887 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of 3888 // hoisting the exts outside loops. Until global ISel comes along that can 3889 // see through such movement this leads to bad CodeGen. So we need an 3890 // intrinsic for now. 3891 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 3892 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 3893 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 3894 case NEON::BI__builtin_neon_vpadal_v: 3895 case NEON::BI__builtin_neon_vpadalq_v: { 3896 // The source operand type has twice as many elements of half the size. 3897 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 3898 llvm::Type *EltTy = 3899 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 3900 llvm::Type *NarrowTy = 3901 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 3902 llvm::Type *Tys[2] = { Ty, NarrowTy }; 3903 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 3904 } 3905 case NEON::BI__builtin_neon_vpaddl_v: 3906 case NEON::BI__builtin_neon_vpaddlq_v: { 3907 // The source operand type has twice as many elements of half the size. 3908 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 3909 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 3910 llvm::Type *NarrowTy = 3911 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 3912 llvm::Type *Tys[2] = { Ty, NarrowTy }; 3913 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); 3914 } 3915 case NEON::BI__builtin_neon_vqdmlal_v: 3916 case NEON::BI__builtin_neon_vqdmlsl_v: { 3917 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 3918 Ops[1] = 3919 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal"); 3920 Ops.resize(2); 3921 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint); 3922 } 3923 case NEON::BI__builtin_neon_vqshl_n_v: 3924 case NEON::BI__builtin_neon_vqshlq_n_v: 3925 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 3926 1, false); 3927 case NEON::BI__builtin_neon_vqshlu_n_v: 3928 case NEON::BI__builtin_neon_vqshluq_n_v: 3929 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 3930 1, false); 3931 case NEON::BI__builtin_neon_vrecpe_v: 3932 case NEON::BI__builtin_neon_vrecpeq_v: 3933 case NEON::BI__builtin_neon_vrsqrte_v: 3934 case NEON::BI__builtin_neon_vrsqrteq_v: 3935 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; 3936 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); 3937 3938 case NEON::BI__builtin_neon_vrshr_n_v: 3939 case NEON::BI__builtin_neon_vrshrq_n_v: 3940 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 3941 1, true); 3942 case NEON::BI__builtin_neon_vshl_n_v: 3943 case NEON::BI__builtin_neon_vshlq_n_v: 3944 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 3945 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], 3946 "vshl_n"); 3947 case NEON::BI__builtin_neon_vshll_n_v: { 3948 llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); 3949 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3950 if (Usgn) 3951 Ops[0] = Builder.CreateZExt(Ops[0], VTy); 3952 else 3953 Ops[0] = Builder.CreateSExt(Ops[0], VTy); 3954 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); 3955 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); 3956 } 3957 case NEON::BI__builtin_neon_vshrn_n_v: { 3958 llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); 3959 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3960 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); 3961 if (Usgn) 3962 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); 3963 else 3964 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); 3965 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); 3966 } 3967 case NEON::BI__builtin_neon_vshr_n_v: 3968 case NEON::BI__builtin_neon_vshrq_n_v: 3969 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n"); 3970 case NEON::BI__builtin_neon_vst1_v: 3971 case NEON::BI__builtin_neon_vst1q_v: 3972 case NEON::BI__builtin_neon_vst2_v: 3973 case NEON::BI__builtin_neon_vst2q_v: 3974 case NEON::BI__builtin_neon_vst3_v: 3975 case NEON::BI__builtin_neon_vst3q_v: 3976 case NEON::BI__builtin_neon_vst4_v: 3977 case NEON::BI__builtin_neon_vst4q_v: 3978 case NEON::BI__builtin_neon_vst2_lane_v: 3979 case NEON::BI__builtin_neon_vst2q_lane_v: 3980 case NEON::BI__builtin_neon_vst3_lane_v: 3981 case NEON::BI__builtin_neon_vst3q_lane_v: 3982 case NEON::BI__builtin_neon_vst4_lane_v: 3983 case NEON::BI__builtin_neon_vst4q_lane_v: { 3984 llvm::Type *Tys[] = {Int8PtrTy, Ty}; 3985 Ops.push_back(getAlignmentValue32(PtrOp0)); 3986 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 3987 } 3988 case NEON::BI__builtin_neon_vsubhn_v: { 3989 llvm::VectorType *SrcTy = 3990 llvm::VectorType::getExtendedElementVectorType(VTy); 3991 3992 // %sum = add <4 x i32> %lhs, %rhs 3993 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3994 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 3995 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); 3996 3997 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 3998 Constant *ShiftAmt = 3999 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 4000 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); 4001 4002 // %res = trunc <4 x i32> %high to <4 x i16> 4003 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); 4004 } 4005 case NEON::BI__builtin_neon_vtrn_v: 4006 case NEON::BI__builtin_neon_vtrnq_v: { 4007 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4008 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4009 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4010 Value *SV = nullptr; 4011 4012 for (unsigned vi = 0; vi != 2; ++vi) { 4013 SmallVector<uint32_t, 16> Indices; 4014 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 4015 Indices.push_back(i+vi); 4016 Indices.push_back(i+e+vi); 4017 } 4018 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4019 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 4020 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4021 } 4022 return SV; 4023 } 4024 case NEON::BI__builtin_neon_vtst_v: 4025 case NEON::BI__builtin_neon_vtstq_v: { 4026 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4027 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4028 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 4029 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 4030 ConstantAggregateZero::get(Ty)); 4031 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 4032 } 4033 case NEON::BI__builtin_neon_vuzp_v: 4034 case NEON::BI__builtin_neon_vuzpq_v: { 4035 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4036 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4037 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4038 Value *SV = nullptr; 4039 4040 for (unsigned vi = 0; vi != 2; ++vi) { 4041 SmallVector<uint32_t, 16> Indices; 4042 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 4043 Indices.push_back(2*i+vi); 4044 4045 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4046 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 4047 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4048 } 4049 return SV; 4050 } 4051 case NEON::BI__builtin_neon_vzip_v: 4052 case NEON::BI__builtin_neon_vzipq_v: { 4053 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4054 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4055 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4056 Value *SV = nullptr; 4057 4058 for (unsigned vi = 0; vi != 2; ++vi) { 4059 SmallVector<uint32_t, 16> Indices; 4060 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 4061 Indices.push_back((i + vi*e) >> 1); 4062 Indices.push_back(((i + vi*e) >> 1)+e); 4063 } 4064 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4065 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 4066 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4067 } 4068 return SV; 4069 } 4070 } 4071 4072 assert(Int && "Expected valid intrinsic number"); 4073 4074 // Determine the type(s) of this overloaded AArch64 intrinsic. 4075 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E); 4076 4077 Value *Result = EmitNeonCall(F, Ops, NameHint); 4078 llvm::Type *ResultType = ConvertType(E->getType()); 4079 // AArch64 intrinsic one-element vector type cast to 4080 // scalar type expected by the builtin 4081 return Builder.CreateBitCast(Result, ResultType, NameHint); 4082 } 4083 4084 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( 4085 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, 4086 const CmpInst::Predicate Ip, const Twine &Name) { 4087 llvm::Type *OTy = Op->getType(); 4088 4089 // FIXME: this is utterly horrific. We should not be looking at previous 4090 // codegen context to find out what needs doing. Unfortunately TableGen 4091 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32 4092 // (etc). 4093 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op)) 4094 OTy = BI->getOperand(0)->getType(); 4095 4096 Op = Builder.CreateBitCast(Op, OTy); 4097 if (OTy->getScalarType()->isFloatingPointTy()) { 4098 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); 4099 } else { 4100 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); 4101 } 4102 return Builder.CreateSExt(Op, Ty, Name); 4103 } 4104 4105 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 4106 Value *ExtOp, Value *IndexOp, 4107 llvm::Type *ResTy, unsigned IntID, 4108 const char *Name) { 4109 SmallVector<Value *, 2> TblOps; 4110 if (ExtOp) 4111 TblOps.push_back(ExtOp); 4112 4113 // Build a vector containing sequential number like (0, 1, 2, ..., 15) 4114 SmallVector<uint32_t, 16> Indices; 4115 llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); 4116 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { 4117 Indices.push_back(2*i); 4118 Indices.push_back(2*i+1); 4119 } 4120 4121 int PairPos = 0, End = Ops.size() - 1; 4122 while (PairPos < End) { 4123 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 4124 Ops[PairPos+1], Indices, 4125 Name)); 4126 PairPos += 2; 4127 } 4128 4129 // If there's an odd number of 64-bit lookup table, fill the high 64-bit 4130 // of the 128-bit lookup table with zero. 4131 if (PairPos == End) { 4132 Value *ZeroTbl = ConstantAggregateZero::get(TblTy); 4133 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 4134 ZeroTbl, Indices, Name)); 4135 } 4136 4137 Function *TblF; 4138 TblOps.push_back(IndexOp); 4139 TblF = CGF.CGM.getIntrinsic(IntID, ResTy); 4140 4141 return CGF.EmitNeonCall(TblF, TblOps, Name); 4142 } 4143 4144 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { 4145 unsigned Value; 4146 switch (BuiltinID) { 4147 default: 4148 return nullptr; 4149 case ARM::BI__builtin_arm_nop: 4150 Value = 0; 4151 break; 4152 case ARM::BI__builtin_arm_yield: 4153 case ARM::BI__yield: 4154 Value = 1; 4155 break; 4156 case ARM::BI__builtin_arm_wfe: 4157 case ARM::BI__wfe: 4158 Value = 2; 4159 break; 4160 case ARM::BI__builtin_arm_wfi: 4161 case ARM::BI__wfi: 4162 Value = 3; 4163 break; 4164 case ARM::BI__builtin_arm_sev: 4165 case ARM::BI__sev: 4166 Value = 4; 4167 break; 4168 case ARM::BI__builtin_arm_sevl: 4169 case ARM::BI__sevl: 4170 Value = 5; 4171 break; 4172 } 4173 4174 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 4175 llvm::ConstantInt::get(Int32Ty, Value)); 4176 } 4177 4178 // Generates the IR for the read/write special register builtin, 4179 // ValueType is the type of the value that is to be written or read, 4180 // RegisterType is the type of the register being written to or read from. 4181 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, 4182 const CallExpr *E, 4183 llvm::Type *RegisterType, 4184 llvm::Type *ValueType, 4185 bool IsRead, 4186 StringRef SysReg = "") { 4187 // write and register intrinsics only support 32 and 64 bit operations. 4188 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64)) 4189 && "Unsupported size for register."); 4190 4191 CodeGen::CGBuilderTy &Builder = CGF.Builder; 4192 CodeGen::CodeGenModule &CGM = CGF.CGM; 4193 LLVMContext &Context = CGM.getLLVMContext(); 4194 4195 if (SysReg.empty()) { 4196 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); 4197 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString(); 4198 } 4199 4200 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) }; 4201 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); 4202 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); 4203 4204 llvm::Type *Types[] = { RegisterType }; 4205 4206 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32); 4207 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64)) 4208 && "Can't fit 64-bit value in 32-bit register"); 4209 4210 if (IsRead) { 4211 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); 4212 llvm::Value *Call = Builder.CreateCall(F, Metadata); 4213 4214 if (MixedTypes) 4215 // Read into 64 bit register and then truncate result to 32 bit. 4216 return Builder.CreateTrunc(Call, ValueType); 4217 4218 if (ValueType->isPointerTy()) 4219 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*). 4220 return Builder.CreateIntToPtr(Call, ValueType); 4221 4222 return Call; 4223 } 4224 4225 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); 4226 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1)); 4227 if (MixedTypes) { 4228 // Extend 32 bit write value to 64 bit to pass to write. 4229 ArgValue = Builder.CreateZExt(ArgValue, RegisterType); 4230 return Builder.CreateCall(F, { Metadata, ArgValue }); 4231 } 4232 4233 if (ValueType->isPointerTy()) { 4234 // Have VoidPtrTy ArgValue but want to return an i32/i64. 4235 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType); 4236 return Builder.CreateCall(F, { Metadata, ArgValue }); 4237 } 4238 4239 return Builder.CreateCall(F, { Metadata, ArgValue }); 4240 } 4241 4242 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra 4243 /// argument that specifies the vector type. 4244 static bool HasExtraNeonArgument(unsigned BuiltinID) { 4245 switch (BuiltinID) { 4246 default: break; 4247 case NEON::BI__builtin_neon_vget_lane_i8: 4248 case NEON::BI__builtin_neon_vget_lane_i16: 4249 case NEON::BI__builtin_neon_vget_lane_i32: 4250 case NEON::BI__builtin_neon_vget_lane_i64: 4251 case NEON::BI__builtin_neon_vget_lane_f32: 4252 case NEON::BI__builtin_neon_vgetq_lane_i8: 4253 case NEON::BI__builtin_neon_vgetq_lane_i16: 4254 case NEON::BI__builtin_neon_vgetq_lane_i32: 4255 case NEON::BI__builtin_neon_vgetq_lane_i64: 4256 case NEON::BI__builtin_neon_vgetq_lane_f32: 4257 case NEON::BI__builtin_neon_vset_lane_i8: 4258 case NEON::BI__builtin_neon_vset_lane_i16: 4259 case NEON::BI__builtin_neon_vset_lane_i32: 4260 case NEON::BI__builtin_neon_vset_lane_i64: 4261 case NEON::BI__builtin_neon_vset_lane_f32: 4262 case NEON::BI__builtin_neon_vsetq_lane_i8: 4263 case NEON::BI__builtin_neon_vsetq_lane_i16: 4264 case NEON::BI__builtin_neon_vsetq_lane_i32: 4265 case NEON::BI__builtin_neon_vsetq_lane_i64: 4266 case NEON::BI__builtin_neon_vsetq_lane_f32: 4267 case NEON::BI__builtin_neon_vsha1h_u32: 4268 case NEON::BI__builtin_neon_vsha1cq_u32: 4269 case NEON::BI__builtin_neon_vsha1pq_u32: 4270 case NEON::BI__builtin_neon_vsha1mq_u32: 4271 case ARM::BI_MoveToCoprocessor: 4272 case ARM::BI_MoveToCoprocessor2: 4273 return false; 4274 } 4275 return true; 4276 } 4277 4278 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 4279 const CallExpr *E) { 4280 if (auto Hint = GetValueForARMHint(BuiltinID)) 4281 return Hint; 4282 4283 if (BuiltinID == ARM::BI__emit) { 4284 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb; 4285 llvm::FunctionType *FTy = 4286 llvm::FunctionType::get(VoidTy, /*Variadic=*/false); 4287 4288 APSInt Value; 4289 if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext())) 4290 llvm_unreachable("Sema will ensure that the parameter is constant"); 4291 4292 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue(); 4293 4294 llvm::InlineAsm *Emit = 4295 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", 4296 /*SideEffects=*/true) 4297 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", 4298 /*SideEffects=*/true); 4299 4300 return Builder.CreateCall(Emit); 4301 } 4302 4303 if (BuiltinID == ARM::BI__builtin_arm_dbg) { 4304 Value *Option = EmitScalarExpr(E->getArg(0)); 4305 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); 4306 } 4307 4308 if (BuiltinID == ARM::BI__builtin_arm_prefetch) { 4309 Value *Address = EmitScalarExpr(E->getArg(0)); 4310 Value *RW = EmitScalarExpr(E->getArg(1)); 4311 Value *IsData = EmitScalarExpr(E->getArg(2)); 4312 4313 // Locality is not supported on ARM target 4314 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); 4315 4316 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 4317 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 4318 } 4319 4320 if (BuiltinID == ARM::BI__builtin_arm_rbit) { 4321 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 4322 return Builder.CreateCall( 4323 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 4324 } 4325 4326 if (BuiltinID == ARM::BI__clear_cache) { 4327 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 4328 const FunctionDecl *FD = E->getDirectCallee(); 4329 Value *Ops[2]; 4330 for (unsigned i = 0; i < 2; i++) 4331 Ops[i] = EmitScalarExpr(E->getArg(i)); 4332 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 4333 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 4334 StringRef Name = FD->getName(); 4335 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 4336 } 4337 4338 if (BuiltinID == ARM::BI__builtin_arm_mcrr || 4339 BuiltinID == ARM::BI__builtin_arm_mcrr2) { 4340 Function *F; 4341 4342 switch (BuiltinID) { 4343 default: llvm_unreachable("unexpected builtin"); 4344 case ARM::BI__builtin_arm_mcrr: 4345 F = CGM.getIntrinsic(Intrinsic::arm_mcrr); 4346 break; 4347 case ARM::BI__builtin_arm_mcrr2: 4348 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2); 4349 break; 4350 } 4351 4352 // MCRR{2} instruction has 5 operands but 4353 // the intrinsic has 4 because Rt and Rt2 4354 // are represented as a single unsigned 64 4355 // bit integer in the intrinsic definition 4356 // but internally it's represented as 2 32 4357 // bit integers. 4358 4359 Value *Coproc = EmitScalarExpr(E->getArg(0)); 4360 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 4361 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2)); 4362 Value *CRm = EmitScalarExpr(E->getArg(3)); 4363 4364 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 4365 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty); 4366 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1); 4367 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty); 4368 4369 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm}); 4370 } 4371 4372 if (BuiltinID == ARM::BI__builtin_arm_mrrc || 4373 BuiltinID == ARM::BI__builtin_arm_mrrc2) { 4374 Function *F; 4375 4376 switch (BuiltinID) { 4377 default: llvm_unreachable("unexpected builtin"); 4378 case ARM::BI__builtin_arm_mrrc: 4379 F = CGM.getIntrinsic(Intrinsic::arm_mrrc); 4380 break; 4381 case ARM::BI__builtin_arm_mrrc2: 4382 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2); 4383 break; 4384 } 4385 4386 Value *Coproc = EmitScalarExpr(E->getArg(0)); 4387 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 4388 Value *CRm = EmitScalarExpr(E->getArg(2)); 4389 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm}); 4390 4391 // Returns an unsigned 64 bit integer, represented 4392 // as two 32 bit integers. 4393 4394 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1); 4395 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0); 4396 Rt = Builder.CreateZExt(Rt, Int64Ty); 4397 Rt1 = Builder.CreateZExt(Rt1, Int64Ty); 4398 4399 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32); 4400 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true); 4401 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1); 4402 4403 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType())); 4404 } 4405 4406 if (BuiltinID == ARM::BI__builtin_arm_ldrexd || 4407 ((BuiltinID == ARM::BI__builtin_arm_ldrex || 4408 BuiltinID == ARM::BI__builtin_arm_ldaex) && 4409 getContext().getTypeSize(E->getType()) == 64) || 4410 BuiltinID == ARM::BI__ldrexd) { 4411 Function *F; 4412 4413 switch (BuiltinID) { 4414 default: llvm_unreachable("unexpected builtin"); 4415 case ARM::BI__builtin_arm_ldaex: 4416 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); 4417 break; 4418 case ARM::BI__builtin_arm_ldrexd: 4419 case ARM::BI__builtin_arm_ldrex: 4420 case ARM::BI__ldrexd: 4421 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 4422 break; 4423 } 4424 4425 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 4426 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 4427 "ldrexd"); 4428 4429 Value *Val0 = Builder.CreateExtractValue(Val, 1); 4430 Value *Val1 = Builder.CreateExtractValue(Val, 0); 4431 Val0 = Builder.CreateZExt(Val0, Int64Ty); 4432 Val1 = Builder.CreateZExt(Val1, Int64Ty); 4433 4434 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 4435 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 4436 Val = Builder.CreateOr(Val, Val1); 4437 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 4438 } 4439 4440 if (BuiltinID == ARM::BI__builtin_arm_ldrex || 4441 BuiltinID == ARM::BI__builtin_arm_ldaex) { 4442 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 4443 4444 QualType Ty = E->getType(); 4445 llvm::Type *RealResTy = ConvertType(Ty); 4446 llvm::Type *PtrTy = llvm::IntegerType::get( 4447 getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); 4448 LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); 4449 4450 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex 4451 ? Intrinsic::arm_ldaex 4452 : Intrinsic::arm_ldrex, 4453 PtrTy); 4454 Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); 4455 4456 if (RealResTy->isPointerTy()) 4457 return Builder.CreateIntToPtr(Val, RealResTy); 4458 else { 4459 llvm::Type *IntResTy = llvm::IntegerType::get( 4460 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); 4461 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 4462 return Builder.CreateBitCast(Val, RealResTy); 4463 } 4464 } 4465 4466 if (BuiltinID == ARM::BI__builtin_arm_strexd || 4467 ((BuiltinID == ARM::BI__builtin_arm_stlex || 4468 BuiltinID == ARM::BI__builtin_arm_strex) && 4469 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { 4470 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 4471 ? Intrinsic::arm_stlexd 4472 : Intrinsic::arm_strexd); 4473 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr); 4474 4475 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 4476 Value *Val = EmitScalarExpr(E->getArg(0)); 4477 Builder.CreateStore(Val, Tmp); 4478 4479 Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 4480 Val = Builder.CreateLoad(LdPtr); 4481 4482 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 4483 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 4484 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); 4485 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd"); 4486 } 4487 4488 if (BuiltinID == ARM::BI__builtin_arm_strex || 4489 BuiltinID == ARM::BI__builtin_arm_stlex) { 4490 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 4491 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 4492 4493 QualType Ty = E->getArg(0)->getType(); 4494 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 4495 getContext().getTypeSize(Ty)); 4496 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 4497 4498 if (StoreVal->getType()->isPointerTy()) 4499 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); 4500 else { 4501 llvm::Type *IntTy = llvm::IntegerType::get( 4502 getLLVMContext(), 4503 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); 4504 StoreVal = Builder.CreateBitCast(StoreVal, IntTy); 4505 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); 4506 } 4507 4508 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 4509 ? Intrinsic::arm_stlex 4510 : Intrinsic::arm_strex, 4511 StoreAddr->getType()); 4512 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); 4513 } 4514 4515 switch (BuiltinID) { 4516 case ARM::BI__iso_volatile_load8: 4517 case ARM::BI__iso_volatile_load16: 4518 case ARM::BI__iso_volatile_load32: 4519 case ARM::BI__iso_volatile_load64: { 4520 Value *Ptr = EmitScalarExpr(E->getArg(0)); 4521 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 4522 CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy); 4523 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 4524 LoadSize.getQuantity() * 8); 4525 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 4526 llvm::LoadInst *Load = 4527 Builder.CreateAlignedLoad(Ptr, LoadSize); 4528 Load->setVolatile(true); 4529 return Load; 4530 } 4531 case ARM::BI__iso_volatile_store8: 4532 case ARM::BI__iso_volatile_store16: 4533 case ARM::BI__iso_volatile_store32: 4534 case ARM::BI__iso_volatile_store64: { 4535 Value *Ptr = EmitScalarExpr(E->getArg(0)); 4536 Value *Value = EmitScalarExpr(E->getArg(1)); 4537 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 4538 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 4539 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 4540 StoreSize.getQuantity() * 8); 4541 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 4542 llvm::StoreInst *Store = 4543 Builder.CreateAlignedStore(Value, Ptr, 4544 StoreSize); 4545 Store->setVolatile(true); 4546 return Store; 4547 } 4548 } 4549 4550 if (BuiltinID == ARM::BI__builtin_arm_clrex) { 4551 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); 4552 return Builder.CreateCall(F); 4553 } 4554 4555 // CRC32 4556 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 4557 switch (BuiltinID) { 4558 case ARM::BI__builtin_arm_crc32b: 4559 CRCIntrinsicID = Intrinsic::arm_crc32b; break; 4560 case ARM::BI__builtin_arm_crc32cb: 4561 CRCIntrinsicID = Intrinsic::arm_crc32cb; break; 4562 case ARM::BI__builtin_arm_crc32h: 4563 CRCIntrinsicID = Intrinsic::arm_crc32h; break; 4564 case ARM::BI__builtin_arm_crc32ch: 4565 CRCIntrinsicID = Intrinsic::arm_crc32ch; break; 4566 case ARM::BI__builtin_arm_crc32w: 4567 case ARM::BI__builtin_arm_crc32d: 4568 CRCIntrinsicID = Intrinsic::arm_crc32w; break; 4569 case ARM::BI__builtin_arm_crc32cw: 4570 case ARM::BI__builtin_arm_crc32cd: 4571 CRCIntrinsicID = Intrinsic::arm_crc32cw; break; 4572 } 4573 4574 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 4575 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 4576 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 4577 4578 // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w 4579 // intrinsics, hence we need different codegen for these cases. 4580 if (BuiltinID == ARM::BI__builtin_arm_crc32d || 4581 BuiltinID == ARM::BI__builtin_arm_crc32cd) { 4582 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 4583 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); 4584 Value *Arg1b = Builder.CreateLShr(Arg1, C1); 4585 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); 4586 4587 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4588 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a}); 4589 return Builder.CreateCall(F, {Res, Arg1b}); 4590 } else { 4591 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); 4592 4593 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4594 return Builder.CreateCall(F, {Arg0, Arg1}); 4595 } 4596 } 4597 4598 if (BuiltinID == ARM::BI__builtin_arm_rsr || 4599 BuiltinID == ARM::BI__builtin_arm_rsr64 || 4600 BuiltinID == ARM::BI__builtin_arm_rsrp || 4601 BuiltinID == ARM::BI__builtin_arm_wsr || 4602 BuiltinID == ARM::BI__builtin_arm_wsr64 || 4603 BuiltinID == ARM::BI__builtin_arm_wsrp) { 4604 4605 bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr || 4606 BuiltinID == ARM::BI__builtin_arm_rsr64 || 4607 BuiltinID == ARM::BI__builtin_arm_rsrp; 4608 4609 bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp || 4610 BuiltinID == ARM::BI__builtin_arm_wsrp; 4611 4612 bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 || 4613 BuiltinID == ARM::BI__builtin_arm_wsr64; 4614 4615 llvm::Type *ValueType; 4616 llvm::Type *RegisterType; 4617 if (IsPointerBuiltin) { 4618 ValueType = VoidPtrTy; 4619 RegisterType = Int32Ty; 4620 } else if (Is64Bit) { 4621 ValueType = RegisterType = Int64Ty; 4622 } else { 4623 ValueType = RegisterType = Int32Ty; 4624 } 4625 4626 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 4627 } 4628 4629 // Find out if any arguments are required to be integer constant 4630 // expressions. 4631 unsigned ICEArguments = 0; 4632 ASTContext::GetBuiltinTypeError Error; 4633 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 4634 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 4635 4636 auto getAlignmentValue32 = [&](Address addr) -> Value* { 4637 return Builder.getInt32(addr.getAlignment().getQuantity()); 4638 }; 4639 4640 Address PtrOp0 = Address::invalid(); 4641 Address PtrOp1 = Address::invalid(); 4642 SmallVector<Value*, 4> Ops; 4643 bool HasExtraArg = HasExtraNeonArgument(BuiltinID); 4644 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0); 4645 for (unsigned i = 0, e = NumArgs; i != e; i++) { 4646 if (i == 0) { 4647 switch (BuiltinID) { 4648 case NEON::BI__builtin_neon_vld1_v: 4649 case NEON::BI__builtin_neon_vld1q_v: 4650 case NEON::BI__builtin_neon_vld1q_lane_v: 4651 case NEON::BI__builtin_neon_vld1_lane_v: 4652 case NEON::BI__builtin_neon_vld1_dup_v: 4653 case NEON::BI__builtin_neon_vld1q_dup_v: 4654 case NEON::BI__builtin_neon_vst1_v: 4655 case NEON::BI__builtin_neon_vst1q_v: 4656 case NEON::BI__builtin_neon_vst1q_lane_v: 4657 case NEON::BI__builtin_neon_vst1_lane_v: 4658 case NEON::BI__builtin_neon_vst2_v: 4659 case NEON::BI__builtin_neon_vst2q_v: 4660 case NEON::BI__builtin_neon_vst2_lane_v: 4661 case NEON::BI__builtin_neon_vst2q_lane_v: 4662 case NEON::BI__builtin_neon_vst3_v: 4663 case NEON::BI__builtin_neon_vst3q_v: 4664 case NEON::BI__builtin_neon_vst3_lane_v: 4665 case NEON::BI__builtin_neon_vst3q_lane_v: 4666 case NEON::BI__builtin_neon_vst4_v: 4667 case NEON::BI__builtin_neon_vst4q_v: 4668 case NEON::BI__builtin_neon_vst4_lane_v: 4669 case NEON::BI__builtin_neon_vst4q_lane_v: 4670 // Get the alignment for the argument in addition to the value; 4671 // we'll use it later. 4672 PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); 4673 Ops.push_back(PtrOp0.getPointer()); 4674 continue; 4675 } 4676 } 4677 if (i == 1) { 4678 switch (BuiltinID) { 4679 case NEON::BI__builtin_neon_vld2_v: 4680 case NEON::BI__builtin_neon_vld2q_v: 4681 case NEON::BI__builtin_neon_vld3_v: 4682 case NEON::BI__builtin_neon_vld3q_v: 4683 case NEON::BI__builtin_neon_vld4_v: 4684 case NEON::BI__builtin_neon_vld4q_v: 4685 case NEON::BI__builtin_neon_vld2_lane_v: 4686 case NEON::BI__builtin_neon_vld2q_lane_v: 4687 case NEON::BI__builtin_neon_vld3_lane_v: 4688 case NEON::BI__builtin_neon_vld3q_lane_v: 4689 case NEON::BI__builtin_neon_vld4_lane_v: 4690 case NEON::BI__builtin_neon_vld4q_lane_v: 4691 case NEON::BI__builtin_neon_vld2_dup_v: 4692 case NEON::BI__builtin_neon_vld3_dup_v: 4693 case NEON::BI__builtin_neon_vld4_dup_v: 4694 // Get the alignment for the argument in addition to the value; 4695 // we'll use it later. 4696 PtrOp1 = EmitPointerWithAlignment(E->getArg(1)); 4697 Ops.push_back(PtrOp1.getPointer()); 4698 continue; 4699 } 4700 } 4701 4702 if ((ICEArguments & (1 << i)) == 0) { 4703 Ops.push_back(EmitScalarExpr(E->getArg(i))); 4704 } else { 4705 // If this is required to be a constant, constant fold it so that we know 4706 // that the generated intrinsic gets a ConstantInt. 4707 llvm::APSInt Result; 4708 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 4709 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 4710 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 4711 } 4712 } 4713 4714 switch (BuiltinID) { 4715 default: break; 4716 4717 case NEON::BI__builtin_neon_vget_lane_i8: 4718 case NEON::BI__builtin_neon_vget_lane_i16: 4719 case NEON::BI__builtin_neon_vget_lane_i32: 4720 case NEON::BI__builtin_neon_vget_lane_i64: 4721 case NEON::BI__builtin_neon_vget_lane_f32: 4722 case NEON::BI__builtin_neon_vgetq_lane_i8: 4723 case NEON::BI__builtin_neon_vgetq_lane_i16: 4724 case NEON::BI__builtin_neon_vgetq_lane_i32: 4725 case NEON::BI__builtin_neon_vgetq_lane_i64: 4726 case NEON::BI__builtin_neon_vgetq_lane_f32: 4727 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane"); 4728 4729 case NEON::BI__builtin_neon_vset_lane_i8: 4730 case NEON::BI__builtin_neon_vset_lane_i16: 4731 case NEON::BI__builtin_neon_vset_lane_i32: 4732 case NEON::BI__builtin_neon_vset_lane_i64: 4733 case NEON::BI__builtin_neon_vset_lane_f32: 4734 case NEON::BI__builtin_neon_vsetq_lane_i8: 4735 case NEON::BI__builtin_neon_vsetq_lane_i16: 4736 case NEON::BI__builtin_neon_vsetq_lane_i32: 4737 case NEON::BI__builtin_neon_vsetq_lane_i64: 4738 case NEON::BI__builtin_neon_vsetq_lane_f32: 4739 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4740 4741 case NEON::BI__builtin_neon_vsha1h_u32: 4742 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops, 4743 "vsha1h"); 4744 case NEON::BI__builtin_neon_vsha1cq_u32: 4745 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops, 4746 "vsha1h"); 4747 case NEON::BI__builtin_neon_vsha1pq_u32: 4748 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops, 4749 "vsha1h"); 4750 case NEON::BI__builtin_neon_vsha1mq_u32: 4751 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, 4752 "vsha1h"); 4753 4754 // The ARM _MoveToCoprocessor builtins put the input register value as 4755 // the first argument, but the LLVM intrinsic expects it as the third one. 4756 case ARM::BI_MoveToCoprocessor: 4757 case ARM::BI_MoveToCoprocessor2: { 4758 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? 4759 Intrinsic::arm_mcr : Intrinsic::arm_mcr2); 4760 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], 4761 Ops[3], Ops[4], Ops[5]}); 4762 } 4763 case ARM::BI_BitScanForward: 4764 case ARM::BI_BitScanForward64: 4765 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 4766 case ARM::BI_BitScanReverse: 4767 case ARM::BI_BitScanReverse64: 4768 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 4769 4770 case ARM::BI_InterlockedAnd64: 4771 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 4772 case ARM::BI_InterlockedExchange64: 4773 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 4774 case ARM::BI_InterlockedExchangeAdd64: 4775 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 4776 case ARM::BI_InterlockedExchangeSub64: 4777 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 4778 case ARM::BI_InterlockedOr64: 4779 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 4780 case ARM::BI_InterlockedXor64: 4781 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 4782 case ARM::BI_InterlockedDecrement64: 4783 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 4784 case ARM::BI_InterlockedIncrement64: 4785 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 4786 } 4787 4788 // Get the last argument, which specifies the vector type. 4789 assert(HasExtraArg); 4790 llvm::APSInt Result; 4791 const Expr *Arg = E->getArg(E->getNumArgs()-1); 4792 if (!Arg->isIntegerConstantExpr(Result, getContext())) 4793 return nullptr; 4794 4795 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 4796 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 4797 // Determine the overloaded type of this builtin. 4798 llvm::Type *Ty; 4799 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 4800 Ty = FloatTy; 4801 else 4802 Ty = DoubleTy; 4803 4804 // Determine whether this is an unsigned conversion or not. 4805 bool usgn = Result.getZExtValue() == 1; 4806 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 4807 4808 // Call the appropriate intrinsic. 4809 Function *F = CGM.getIntrinsic(Int, Ty); 4810 return Builder.CreateCall(F, Ops, "vcvtr"); 4811 } 4812 4813 // Determine the type of this overloaded NEON intrinsic. 4814 NeonTypeFlags Type(Result.getZExtValue()); 4815 bool usgn = Type.isUnsigned(); 4816 bool rightShift = false; 4817 4818 llvm::VectorType *VTy = GetNeonType(this, Type); 4819 llvm::Type *Ty = VTy; 4820 if (!Ty) 4821 return nullptr; 4822 4823 // Many NEON builtins have identical semantics and uses in ARM and 4824 // AArch64. Emit these in a single function. 4825 auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap); 4826 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 4827 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); 4828 if (Builtin) 4829 return EmitCommonNeonBuiltinExpr( 4830 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 4831 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1); 4832 4833 unsigned Int; 4834 switch (BuiltinID) { 4835 default: return nullptr; 4836 case NEON::BI__builtin_neon_vld1q_lane_v: 4837 // Handle 64-bit integer elements as a special case. Use shuffles of 4838 // one-element vectors to avoid poor code for i64 in the backend. 4839 if (VTy->getElementType()->isIntegerTy(64)) { 4840 // Extract the other lane. 4841 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4842 uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); 4843 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); 4844 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 4845 // Load the value as a one-element vector. 4846 Ty = llvm::VectorType::get(VTy->getElementType(), 1); 4847 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4848 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys); 4849 Value *Align = getAlignmentValue32(PtrOp0); 4850 Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); 4851 // Combine them. 4852 uint32_t Indices[] = {1 - Lane, Lane}; 4853 SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); 4854 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); 4855 } 4856 // fall through 4857 case NEON::BI__builtin_neon_vld1_lane_v: { 4858 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4859 PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType()); 4860 Value *Ld = Builder.CreateLoad(PtrOp0); 4861 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 4862 } 4863 case NEON::BI__builtin_neon_vld2_dup_v: 4864 case NEON::BI__builtin_neon_vld3_dup_v: 4865 case NEON::BI__builtin_neon_vld4_dup_v: { 4866 // Handle 64-bit elements as a special-case. There is no "dup" needed. 4867 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { 4868 switch (BuiltinID) { 4869 case NEON::BI__builtin_neon_vld2_dup_v: 4870 Int = Intrinsic::arm_neon_vld2; 4871 break; 4872 case NEON::BI__builtin_neon_vld3_dup_v: 4873 Int = Intrinsic::arm_neon_vld3; 4874 break; 4875 case NEON::BI__builtin_neon_vld4_dup_v: 4876 Int = Intrinsic::arm_neon_vld4; 4877 break; 4878 default: llvm_unreachable("unknown vld_dup intrinsic?"); 4879 } 4880 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4881 Function *F = CGM.getIntrinsic(Int, Tys); 4882 llvm::Value *Align = getAlignmentValue32(PtrOp1); 4883 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup"); 4884 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4885 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4886 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4887 } 4888 switch (BuiltinID) { 4889 case NEON::BI__builtin_neon_vld2_dup_v: 4890 Int = Intrinsic::arm_neon_vld2lane; 4891 break; 4892 case NEON::BI__builtin_neon_vld3_dup_v: 4893 Int = Intrinsic::arm_neon_vld3lane; 4894 break; 4895 case NEON::BI__builtin_neon_vld4_dup_v: 4896 Int = Intrinsic::arm_neon_vld4lane; 4897 break; 4898 default: llvm_unreachable("unknown vld_dup intrinsic?"); 4899 } 4900 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4901 Function *F = CGM.getIntrinsic(Int, Tys); 4902 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 4903 4904 SmallVector<Value*, 6> Args; 4905 Args.push_back(Ops[1]); 4906 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 4907 4908 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 4909 Args.push_back(CI); 4910 Args.push_back(getAlignmentValue32(PtrOp1)); 4911 4912 Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); 4913 // splat lane 0 to all elts in each vector of the result. 4914 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 4915 Value *Val = Builder.CreateExtractValue(Ops[1], i); 4916 Value *Elt = Builder.CreateBitCast(Val, Ty); 4917 Elt = EmitNeonSplat(Elt, CI); 4918 Elt = Builder.CreateBitCast(Elt, Val->getType()); 4919 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 4920 } 4921 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4922 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4923 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4924 } 4925 case NEON::BI__builtin_neon_vqrshrn_n_v: 4926 Int = 4927 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 4928 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 4929 1, true); 4930 case NEON::BI__builtin_neon_vqrshrun_n_v: 4931 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), 4932 Ops, "vqrshrun_n", 1, true); 4933 case NEON::BI__builtin_neon_vqshrn_n_v: 4934 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 4935 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 4936 1, true); 4937 case NEON::BI__builtin_neon_vqshrun_n_v: 4938 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), 4939 Ops, "vqshrun_n", 1, true); 4940 case NEON::BI__builtin_neon_vrecpe_v: 4941 case NEON::BI__builtin_neon_vrecpeq_v: 4942 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), 4943 Ops, "vrecpe"); 4944 case NEON::BI__builtin_neon_vrshrn_n_v: 4945 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), 4946 Ops, "vrshrn_n", 1, true); 4947 case NEON::BI__builtin_neon_vrsra_n_v: 4948 case NEON::BI__builtin_neon_vrsraq_n_v: 4949 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4950 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4951 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 4952 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 4953 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]}); 4954 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 4955 case NEON::BI__builtin_neon_vsri_n_v: 4956 case NEON::BI__builtin_neon_vsriq_n_v: 4957 rightShift = true; 4958 case NEON::BI__builtin_neon_vsli_n_v: 4959 case NEON::BI__builtin_neon_vsliq_n_v: 4960 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 4961 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), 4962 Ops, "vsli_n"); 4963 case NEON::BI__builtin_neon_vsra_n_v: 4964 case NEON::BI__builtin_neon_vsraq_n_v: 4965 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4966 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 4967 return Builder.CreateAdd(Ops[0], Ops[1]); 4968 case NEON::BI__builtin_neon_vst1q_lane_v: 4969 // Handle 64-bit integer elements as a special case. Use a shuffle to get 4970 // a one-element vector and avoid poor code for i64 in the backend. 4971 if (VTy->getElementType()->isIntegerTy(64)) { 4972 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4973 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); 4974 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 4975 Ops[2] = getAlignmentValue32(PtrOp0); 4976 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()}; 4977 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, 4978 Tys), Ops); 4979 } 4980 // fall through 4981 case NEON::BI__builtin_neon_vst1_lane_v: { 4982 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4983 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 4984 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4985 auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty)); 4986 return St; 4987 } 4988 case NEON::BI__builtin_neon_vtbl1_v: 4989 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 4990 Ops, "vtbl1"); 4991 case NEON::BI__builtin_neon_vtbl2_v: 4992 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 4993 Ops, "vtbl2"); 4994 case NEON::BI__builtin_neon_vtbl3_v: 4995 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 4996 Ops, "vtbl3"); 4997 case NEON::BI__builtin_neon_vtbl4_v: 4998 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 4999 Ops, "vtbl4"); 5000 case NEON::BI__builtin_neon_vtbx1_v: 5001 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 5002 Ops, "vtbx1"); 5003 case NEON::BI__builtin_neon_vtbx2_v: 5004 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 5005 Ops, "vtbx2"); 5006 case NEON::BI__builtin_neon_vtbx3_v: 5007 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 5008 Ops, "vtbx3"); 5009 case NEON::BI__builtin_neon_vtbx4_v: 5010 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 5011 Ops, "vtbx4"); 5012 } 5013 } 5014 5015 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, 5016 const CallExpr *E, 5017 SmallVectorImpl<Value *> &Ops) { 5018 unsigned int Int = 0; 5019 const char *s = nullptr; 5020 5021 switch (BuiltinID) { 5022 default: 5023 return nullptr; 5024 case NEON::BI__builtin_neon_vtbl1_v: 5025 case NEON::BI__builtin_neon_vqtbl1_v: 5026 case NEON::BI__builtin_neon_vqtbl1q_v: 5027 case NEON::BI__builtin_neon_vtbl2_v: 5028 case NEON::BI__builtin_neon_vqtbl2_v: 5029 case NEON::BI__builtin_neon_vqtbl2q_v: 5030 case NEON::BI__builtin_neon_vtbl3_v: 5031 case NEON::BI__builtin_neon_vqtbl3_v: 5032 case NEON::BI__builtin_neon_vqtbl3q_v: 5033 case NEON::BI__builtin_neon_vtbl4_v: 5034 case NEON::BI__builtin_neon_vqtbl4_v: 5035 case NEON::BI__builtin_neon_vqtbl4q_v: 5036 break; 5037 case NEON::BI__builtin_neon_vtbx1_v: 5038 case NEON::BI__builtin_neon_vqtbx1_v: 5039 case NEON::BI__builtin_neon_vqtbx1q_v: 5040 case NEON::BI__builtin_neon_vtbx2_v: 5041 case NEON::BI__builtin_neon_vqtbx2_v: 5042 case NEON::BI__builtin_neon_vqtbx2q_v: 5043 case NEON::BI__builtin_neon_vtbx3_v: 5044 case NEON::BI__builtin_neon_vqtbx3_v: 5045 case NEON::BI__builtin_neon_vqtbx3q_v: 5046 case NEON::BI__builtin_neon_vtbx4_v: 5047 case NEON::BI__builtin_neon_vqtbx4_v: 5048 case NEON::BI__builtin_neon_vqtbx4q_v: 5049 break; 5050 } 5051 5052 assert(E->getNumArgs() >= 3); 5053 5054 // Get the last argument, which specifies the vector type. 5055 llvm::APSInt Result; 5056 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 5057 if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) 5058 return nullptr; 5059 5060 // Determine the type of this overloaded NEON intrinsic. 5061 NeonTypeFlags Type(Result.getZExtValue()); 5062 llvm::VectorType *Ty = GetNeonType(&CGF, Type); 5063 if (!Ty) 5064 return nullptr; 5065 5066 CodeGen::CGBuilderTy &Builder = CGF.Builder; 5067 5068 // AArch64 scalar builtins are not overloaded, they do not have an extra 5069 // argument that specifies the vector type, need to handle each case. 5070 switch (BuiltinID) { 5071 case NEON::BI__builtin_neon_vtbl1_v: { 5072 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr, 5073 Ops[1], Ty, Intrinsic::aarch64_neon_tbl1, 5074 "vtbl1"); 5075 } 5076 case NEON::BI__builtin_neon_vtbl2_v: { 5077 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr, 5078 Ops[2], Ty, Intrinsic::aarch64_neon_tbl1, 5079 "vtbl1"); 5080 } 5081 case NEON::BI__builtin_neon_vtbl3_v: { 5082 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr, 5083 Ops[3], Ty, Intrinsic::aarch64_neon_tbl2, 5084 "vtbl2"); 5085 } 5086 case NEON::BI__builtin_neon_vtbl4_v: { 5087 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr, 5088 Ops[4], Ty, Intrinsic::aarch64_neon_tbl2, 5089 "vtbl2"); 5090 } 5091 case NEON::BI__builtin_neon_vtbx1_v: { 5092 Value *TblRes = 5093 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2], 5094 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); 5095 5096 llvm::Constant *EightV = ConstantInt::get(Ty, 8); 5097 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); 5098 CmpRes = Builder.CreateSExt(CmpRes, Ty); 5099 5100 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 5101 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 5102 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 5103 } 5104 case NEON::BI__builtin_neon_vtbx2_v: { 5105 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0], 5106 Ops[3], Ty, Intrinsic::aarch64_neon_tbx1, 5107 "vtbx1"); 5108 } 5109 case NEON::BI__builtin_neon_vtbx3_v: { 5110 Value *TblRes = 5111 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4], 5112 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); 5113 5114 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24); 5115 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], 5116 TwentyFourV); 5117 CmpRes = Builder.CreateSExt(CmpRes, Ty); 5118 5119 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 5120 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 5121 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 5122 } 5123 case NEON::BI__builtin_neon_vtbx4_v: { 5124 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0], 5125 Ops[5], Ty, Intrinsic::aarch64_neon_tbx2, 5126 "vtbx2"); 5127 } 5128 case NEON::BI__builtin_neon_vqtbl1_v: 5129 case NEON::BI__builtin_neon_vqtbl1q_v: 5130 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break; 5131 case NEON::BI__builtin_neon_vqtbl2_v: 5132 case NEON::BI__builtin_neon_vqtbl2q_v: { 5133 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break; 5134 case NEON::BI__builtin_neon_vqtbl3_v: 5135 case NEON::BI__builtin_neon_vqtbl3q_v: 5136 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break; 5137 case NEON::BI__builtin_neon_vqtbl4_v: 5138 case NEON::BI__builtin_neon_vqtbl4q_v: 5139 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break; 5140 case NEON::BI__builtin_neon_vqtbx1_v: 5141 case NEON::BI__builtin_neon_vqtbx1q_v: 5142 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break; 5143 case NEON::BI__builtin_neon_vqtbx2_v: 5144 case NEON::BI__builtin_neon_vqtbx2q_v: 5145 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break; 5146 case NEON::BI__builtin_neon_vqtbx3_v: 5147 case NEON::BI__builtin_neon_vqtbx3q_v: 5148 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break; 5149 case NEON::BI__builtin_neon_vqtbx4_v: 5150 case NEON::BI__builtin_neon_vqtbx4q_v: 5151 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break; 5152 } 5153 } 5154 5155 if (!Int) 5156 return nullptr; 5157 5158 Function *F = CGF.CGM.getIntrinsic(Int, Ty); 5159 return CGF.EmitNeonCall(F, Ops, s); 5160 } 5161 5162 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { 5163 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 5164 Op = Builder.CreateBitCast(Op, Int16Ty); 5165 Value *V = UndefValue::get(VTy); 5166 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 5167 Op = Builder.CreateInsertElement(V, Op, CI); 5168 return Op; 5169 } 5170 5171 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, 5172 const CallExpr *E) { 5173 unsigned HintID = static_cast<unsigned>(-1); 5174 switch (BuiltinID) { 5175 default: break; 5176 case AArch64::BI__builtin_arm_nop: 5177 HintID = 0; 5178 break; 5179 case AArch64::BI__builtin_arm_yield: 5180 HintID = 1; 5181 break; 5182 case AArch64::BI__builtin_arm_wfe: 5183 HintID = 2; 5184 break; 5185 case AArch64::BI__builtin_arm_wfi: 5186 HintID = 3; 5187 break; 5188 case AArch64::BI__builtin_arm_sev: 5189 HintID = 4; 5190 break; 5191 case AArch64::BI__builtin_arm_sevl: 5192 HintID = 5; 5193 break; 5194 } 5195 5196 if (HintID != static_cast<unsigned>(-1)) { 5197 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint); 5198 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 5199 } 5200 5201 if (BuiltinID == AArch64::BI__builtin_arm_prefetch) { 5202 Value *Address = EmitScalarExpr(E->getArg(0)); 5203 Value *RW = EmitScalarExpr(E->getArg(1)); 5204 Value *CacheLevel = EmitScalarExpr(E->getArg(2)); 5205 Value *RetentionPolicy = EmitScalarExpr(E->getArg(3)); 5206 Value *IsData = EmitScalarExpr(E->getArg(4)); 5207 5208 Value *Locality = nullptr; 5209 if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) { 5210 // Temporal fetch, needs to convert cache level to locality. 5211 Locality = llvm::ConstantInt::get(Int32Ty, 5212 -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3); 5213 } else { 5214 // Streaming fetch. 5215 Locality = llvm::ConstantInt::get(Int32Ty, 0); 5216 } 5217 5218 // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify 5219 // PLDL3STRM or PLDL2STRM. 5220 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 5221 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 5222 } 5223 5224 if (BuiltinID == AArch64::BI__builtin_arm_rbit) { 5225 assert((getContext().getTypeSize(E->getType()) == 32) && 5226 "rbit of unusual size!"); 5227 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 5228 return Builder.CreateCall( 5229 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 5230 } 5231 if (BuiltinID == AArch64::BI__builtin_arm_rbit64) { 5232 assert((getContext().getTypeSize(E->getType()) == 64) && 5233 "rbit of unusual size!"); 5234 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 5235 return Builder.CreateCall( 5236 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 5237 } 5238 5239 if (BuiltinID == AArch64::BI__clear_cache) { 5240 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 5241 const FunctionDecl *FD = E->getDirectCallee(); 5242 Value *Ops[2]; 5243 for (unsigned i = 0; i < 2; i++) 5244 Ops[i] = EmitScalarExpr(E->getArg(i)); 5245 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 5246 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 5247 StringRef Name = FD->getName(); 5248 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 5249 } 5250 5251 if ((BuiltinID == AArch64::BI__builtin_arm_ldrex || 5252 BuiltinID == AArch64::BI__builtin_arm_ldaex) && 5253 getContext().getTypeSize(E->getType()) == 128) { 5254 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 5255 ? Intrinsic::aarch64_ldaxp 5256 : Intrinsic::aarch64_ldxp); 5257 5258 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 5259 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 5260 "ldxp"); 5261 5262 Value *Val0 = Builder.CreateExtractValue(Val, 1); 5263 Value *Val1 = Builder.CreateExtractValue(Val, 0); 5264 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 5265 Val0 = Builder.CreateZExt(Val0, Int128Ty); 5266 Val1 = Builder.CreateZExt(Val1, Int128Ty); 5267 5268 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64); 5269 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 5270 Val = Builder.CreateOr(Val, Val1); 5271 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 5272 } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex || 5273 BuiltinID == AArch64::BI__builtin_arm_ldaex) { 5274 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 5275 5276 QualType Ty = E->getType(); 5277 llvm::Type *RealResTy = ConvertType(Ty); 5278 llvm::Type *PtrTy = llvm::IntegerType::get( 5279 getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); 5280 LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); 5281 5282 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 5283 ? Intrinsic::aarch64_ldaxr 5284 : Intrinsic::aarch64_ldxr, 5285 PtrTy); 5286 Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); 5287 5288 if (RealResTy->isPointerTy()) 5289 return Builder.CreateIntToPtr(Val, RealResTy); 5290 5291 llvm::Type *IntResTy = llvm::IntegerType::get( 5292 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); 5293 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 5294 return Builder.CreateBitCast(Val, RealResTy); 5295 } 5296 5297 if ((BuiltinID == AArch64::BI__builtin_arm_strex || 5298 BuiltinID == AArch64::BI__builtin_arm_stlex) && 5299 getContext().getTypeSize(E->getArg(0)->getType()) == 128) { 5300 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 5301 ? Intrinsic::aarch64_stlxp 5302 : Intrinsic::aarch64_stxp); 5303 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr); 5304 5305 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 5306 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true); 5307 5308 Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy)); 5309 llvm::Value *Val = Builder.CreateLoad(Tmp); 5310 5311 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 5312 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 5313 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), 5314 Int8PtrTy); 5315 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp"); 5316 } 5317 5318 if (BuiltinID == AArch64::BI__builtin_arm_strex || 5319 BuiltinID == AArch64::BI__builtin_arm_stlex) { 5320 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 5321 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 5322 5323 QualType Ty = E->getArg(0)->getType(); 5324 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 5325 getContext().getTypeSize(Ty)); 5326 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 5327 5328 if (StoreVal->getType()->isPointerTy()) 5329 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); 5330 else { 5331 llvm::Type *IntTy = llvm::IntegerType::get( 5332 getLLVMContext(), 5333 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); 5334 StoreVal = Builder.CreateBitCast(StoreVal, IntTy); 5335 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); 5336 } 5337 5338 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 5339 ? Intrinsic::aarch64_stlxr 5340 : Intrinsic::aarch64_stxr, 5341 StoreAddr->getType()); 5342 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); 5343 } 5344 5345 if (BuiltinID == AArch64::BI__builtin_arm_clrex) { 5346 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); 5347 return Builder.CreateCall(F); 5348 } 5349 5350 // CRC32 5351 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 5352 switch (BuiltinID) { 5353 case AArch64::BI__builtin_arm_crc32b: 5354 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; 5355 case AArch64::BI__builtin_arm_crc32cb: 5356 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; 5357 case AArch64::BI__builtin_arm_crc32h: 5358 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; 5359 case AArch64::BI__builtin_arm_crc32ch: 5360 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; 5361 case AArch64::BI__builtin_arm_crc32w: 5362 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; 5363 case AArch64::BI__builtin_arm_crc32cw: 5364 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; 5365 case AArch64::BI__builtin_arm_crc32d: 5366 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; 5367 case AArch64::BI__builtin_arm_crc32cd: 5368 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; 5369 } 5370 5371 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 5372 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 5373 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 5374 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 5375 5376 llvm::Type *DataTy = F->getFunctionType()->getParamType(1); 5377 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); 5378 5379 return Builder.CreateCall(F, {Arg0, Arg1}); 5380 } 5381 5382 if (BuiltinID == AArch64::BI__builtin_arm_rsr || 5383 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 5384 BuiltinID == AArch64::BI__builtin_arm_rsrp || 5385 BuiltinID == AArch64::BI__builtin_arm_wsr || 5386 BuiltinID == AArch64::BI__builtin_arm_wsr64 || 5387 BuiltinID == AArch64::BI__builtin_arm_wsrp) { 5388 5389 bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr || 5390 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 5391 BuiltinID == AArch64::BI__builtin_arm_rsrp; 5392 5393 bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp || 5394 BuiltinID == AArch64::BI__builtin_arm_wsrp; 5395 5396 bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr && 5397 BuiltinID != AArch64::BI__builtin_arm_wsr; 5398 5399 llvm::Type *ValueType; 5400 llvm::Type *RegisterType = Int64Ty; 5401 if (IsPointerBuiltin) { 5402 ValueType = VoidPtrTy; 5403 } else if (Is64Bit) { 5404 ValueType = Int64Ty; 5405 } else { 5406 ValueType = Int32Ty; 5407 } 5408 5409 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 5410 } 5411 5412 // Find out if any arguments are required to be integer constant 5413 // expressions. 5414 unsigned ICEArguments = 0; 5415 ASTContext::GetBuiltinTypeError Error; 5416 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 5417 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 5418 5419 llvm::SmallVector<Value*, 4> Ops; 5420 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 5421 if ((ICEArguments & (1 << i)) == 0) { 5422 Ops.push_back(EmitScalarExpr(E->getArg(i))); 5423 } else { 5424 // If this is required to be a constant, constant fold it so that we know 5425 // that the generated intrinsic gets a ConstantInt. 5426 llvm::APSInt Result; 5427 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 5428 assert(IsConst && "Constant arg isn't actually constant?"); 5429 (void)IsConst; 5430 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 5431 } 5432 } 5433 5434 auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap); 5435 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 5436 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); 5437 5438 if (Builtin) { 5439 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); 5440 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); 5441 assert(Result && "SISD intrinsic should have been handled"); 5442 return Result; 5443 } 5444 5445 llvm::APSInt Result; 5446 const Expr *Arg = E->getArg(E->getNumArgs()-1); 5447 NeonTypeFlags Type(0); 5448 if (Arg->isIntegerConstantExpr(Result, getContext())) 5449 // Determine the type of this overloaded NEON intrinsic. 5450 Type = NeonTypeFlags(Result.getZExtValue()); 5451 5452 bool usgn = Type.isUnsigned(); 5453 bool quad = Type.isQuad(); 5454 5455 // Handle non-overloaded intrinsics first. 5456 switch (BuiltinID) { 5457 default: break; 5458 case NEON::BI__builtin_neon_vldrq_p128: { 5459 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); 5460 llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0); 5461 Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); 5462 return Builder.CreateAlignedLoad(Int128Ty, Ptr, 5463 CharUnits::fromQuantity(16)); 5464 } 5465 case NEON::BI__builtin_neon_vstrq_p128: { 5466 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 5467 Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); 5468 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr); 5469 } 5470 case NEON::BI__builtin_neon_vcvts_u32_f32: 5471 case NEON::BI__builtin_neon_vcvtd_u64_f64: 5472 usgn = true; 5473 // FALL THROUGH 5474 case NEON::BI__builtin_neon_vcvts_s32_f32: 5475 case NEON::BI__builtin_neon_vcvtd_s64_f64: { 5476 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5477 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 5478 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 5479 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 5480 Ops[0] = Builder.CreateBitCast(Ops[0], FTy); 5481 if (usgn) 5482 return Builder.CreateFPToUI(Ops[0], InTy); 5483 return Builder.CreateFPToSI(Ops[0], InTy); 5484 } 5485 case NEON::BI__builtin_neon_vcvts_f32_u32: 5486 case NEON::BI__builtin_neon_vcvtd_f64_u64: 5487 usgn = true; 5488 // FALL THROUGH 5489 case NEON::BI__builtin_neon_vcvts_f32_s32: 5490 case NEON::BI__builtin_neon_vcvtd_f64_s64: { 5491 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5492 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 5493 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 5494 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 5495 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 5496 if (usgn) 5497 return Builder.CreateUIToFP(Ops[0], FTy); 5498 return Builder.CreateSIToFP(Ops[0], FTy); 5499 } 5500 case NEON::BI__builtin_neon_vpaddd_s64: { 5501 llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2); 5502 Value *Vec = EmitScalarExpr(E->getArg(0)); 5503 // The vector is v2f64, so make sure it's bitcast to that. 5504 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); 5505 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5506 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5507 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5508 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5509 // Pairwise addition of a v2f64 into a scalar f64. 5510 return Builder.CreateAdd(Op0, Op1, "vpaddd"); 5511 } 5512 case NEON::BI__builtin_neon_vpaddd_f64: { 5513 llvm::Type *Ty = 5514 llvm::VectorType::get(DoubleTy, 2); 5515 Value *Vec = EmitScalarExpr(E->getArg(0)); 5516 // The vector is v2f64, so make sure it's bitcast to that. 5517 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); 5518 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5519 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5520 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5521 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5522 // Pairwise addition of a v2f64 into a scalar f64. 5523 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 5524 } 5525 case NEON::BI__builtin_neon_vpadds_f32: { 5526 llvm::Type *Ty = 5527 llvm::VectorType::get(FloatTy, 2); 5528 Value *Vec = EmitScalarExpr(E->getArg(0)); 5529 // The vector is v2f32, so make sure it's bitcast to that. 5530 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); 5531 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5532 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5533 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5534 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5535 // Pairwise addition of a v2f32 into a scalar f32. 5536 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 5537 } 5538 case NEON::BI__builtin_neon_vceqzd_s64: 5539 case NEON::BI__builtin_neon_vceqzd_f64: 5540 case NEON::BI__builtin_neon_vceqzs_f32: 5541 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5542 return EmitAArch64CompareBuiltinExpr( 5543 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5544 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz"); 5545 case NEON::BI__builtin_neon_vcgezd_s64: 5546 case NEON::BI__builtin_neon_vcgezd_f64: 5547 case NEON::BI__builtin_neon_vcgezs_f32: 5548 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5549 return EmitAArch64CompareBuiltinExpr( 5550 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5551 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez"); 5552 case NEON::BI__builtin_neon_vclezd_s64: 5553 case NEON::BI__builtin_neon_vclezd_f64: 5554 case NEON::BI__builtin_neon_vclezs_f32: 5555 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5556 return EmitAArch64CompareBuiltinExpr( 5557 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5558 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez"); 5559 case NEON::BI__builtin_neon_vcgtzd_s64: 5560 case NEON::BI__builtin_neon_vcgtzd_f64: 5561 case NEON::BI__builtin_neon_vcgtzs_f32: 5562 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5563 return EmitAArch64CompareBuiltinExpr( 5564 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5565 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz"); 5566 case NEON::BI__builtin_neon_vcltzd_s64: 5567 case NEON::BI__builtin_neon_vcltzd_f64: 5568 case NEON::BI__builtin_neon_vcltzs_f32: 5569 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5570 return EmitAArch64CompareBuiltinExpr( 5571 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5572 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz"); 5573 5574 case NEON::BI__builtin_neon_vceqzd_u64: { 5575 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5576 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5577 Ops[0] = 5578 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty)); 5579 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd"); 5580 } 5581 case NEON::BI__builtin_neon_vceqd_f64: 5582 case NEON::BI__builtin_neon_vcled_f64: 5583 case NEON::BI__builtin_neon_vcltd_f64: 5584 case NEON::BI__builtin_neon_vcged_f64: 5585 case NEON::BI__builtin_neon_vcgtd_f64: { 5586 llvm::CmpInst::Predicate P; 5587 switch (BuiltinID) { 5588 default: llvm_unreachable("missing builtin ID in switch!"); 5589 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break; 5590 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break; 5591 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break; 5592 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break; 5593 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break; 5594 } 5595 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5596 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5597 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 5598 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 5599 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); 5600 } 5601 case NEON::BI__builtin_neon_vceqs_f32: 5602 case NEON::BI__builtin_neon_vcles_f32: 5603 case NEON::BI__builtin_neon_vclts_f32: 5604 case NEON::BI__builtin_neon_vcges_f32: 5605 case NEON::BI__builtin_neon_vcgts_f32: { 5606 llvm::CmpInst::Predicate P; 5607 switch (BuiltinID) { 5608 default: llvm_unreachable("missing builtin ID in switch!"); 5609 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break; 5610 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break; 5611 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break; 5612 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break; 5613 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break; 5614 } 5615 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5616 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 5617 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); 5618 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 5619 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); 5620 } 5621 case NEON::BI__builtin_neon_vceqd_s64: 5622 case NEON::BI__builtin_neon_vceqd_u64: 5623 case NEON::BI__builtin_neon_vcgtd_s64: 5624 case NEON::BI__builtin_neon_vcgtd_u64: 5625 case NEON::BI__builtin_neon_vcltd_s64: 5626 case NEON::BI__builtin_neon_vcltd_u64: 5627 case NEON::BI__builtin_neon_vcged_u64: 5628 case NEON::BI__builtin_neon_vcged_s64: 5629 case NEON::BI__builtin_neon_vcled_u64: 5630 case NEON::BI__builtin_neon_vcled_s64: { 5631 llvm::CmpInst::Predicate P; 5632 switch (BuiltinID) { 5633 default: llvm_unreachable("missing builtin ID in switch!"); 5634 case NEON::BI__builtin_neon_vceqd_s64: 5635 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break; 5636 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break; 5637 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break; 5638 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break; 5639 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break; 5640 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break; 5641 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break; 5642 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break; 5643 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break; 5644 } 5645 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5646 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5647 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5648 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); 5649 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); 5650 } 5651 case NEON::BI__builtin_neon_vtstd_s64: 5652 case NEON::BI__builtin_neon_vtstd_u64: { 5653 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5654 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5655 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5656 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 5657 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 5658 llvm::Constant::getNullValue(Int64Ty)); 5659 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd"); 5660 } 5661 case NEON::BI__builtin_neon_vset_lane_i8: 5662 case NEON::BI__builtin_neon_vset_lane_i16: 5663 case NEON::BI__builtin_neon_vset_lane_i32: 5664 case NEON::BI__builtin_neon_vset_lane_i64: 5665 case NEON::BI__builtin_neon_vset_lane_f32: 5666 case NEON::BI__builtin_neon_vsetq_lane_i8: 5667 case NEON::BI__builtin_neon_vsetq_lane_i16: 5668 case NEON::BI__builtin_neon_vsetq_lane_i32: 5669 case NEON::BI__builtin_neon_vsetq_lane_i64: 5670 case NEON::BI__builtin_neon_vsetq_lane_f32: 5671 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5672 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5673 case NEON::BI__builtin_neon_vset_lane_f64: 5674 // The vector type needs a cast for the v1f64 variant. 5675 Ops[1] = Builder.CreateBitCast(Ops[1], 5676 llvm::VectorType::get(DoubleTy, 1)); 5677 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5678 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5679 case NEON::BI__builtin_neon_vsetq_lane_f64: 5680 // The vector type needs a cast for the v2f64 variant. 5681 Ops[1] = Builder.CreateBitCast(Ops[1], 5682 llvm::VectorType::get(DoubleTy, 2)); 5683 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5684 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5685 5686 case NEON::BI__builtin_neon_vget_lane_i8: 5687 case NEON::BI__builtin_neon_vdupb_lane_i8: 5688 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8)); 5689 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5690 "vget_lane"); 5691 case NEON::BI__builtin_neon_vgetq_lane_i8: 5692 case NEON::BI__builtin_neon_vdupb_laneq_i8: 5693 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16)); 5694 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5695 "vgetq_lane"); 5696 case NEON::BI__builtin_neon_vget_lane_i16: 5697 case NEON::BI__builtin_neon_vduph_lane_i16: 5698 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4)); 5699 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5700 "vget_lane"); 5701 case NEON::BI__builtin_neon_vgetq_lane_i16: 5702 case NEON::BI__builtin_neon_vduph_laneq_i16: 5703 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8)); 5704 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5705 "vgetq_lane"); 5706 case NEON::BI__builtin_neon_vget_lane_i32: 5707 case NEON::BI__builtin_neon_vdups_lane_i32: 5708 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2)); 5709 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5710 "vget_lane"); 5711 case NEON::BI__builtin_neon_vdups_lane_f32: 5712 Ops[0] = Builder.CreateBitCast(Ops[0], 5713 llvm::VectorType::get(FloatTy, 2)); 5714 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5715 "vdups_lane"); 5716 case NEON::BI__builtin_neon_vgetq_lane_i32: 5717 case NEON::BI__builtin_neon_vdups_laneq_i32: 5718 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 5719 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5720 "vgetq_lane"); 5721 case NEON::BI__builtin_neon_vget_lane_i64: 5722 case NEON::BI__builtin_neon_vdupd_lane_i64: 5723 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1)); 5724 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5725 "vget_lane"); 5726 case NEON::BI__builtin_neon_vdupd_lane_f64: 5727 Ops[0] = Builder.CreateBitCast(Ops[0], 5728 llvm::VectorType::get(DoubleTy, 1)); 5729 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5730 "vdupd_lane"); 5731 case NEON::BI__builtin_neon_vgetq_lane_i64: 5732 case NEON::BI__builtin_neon_vdupd_laneq_i64: 5733 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 5734 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5735 "vgetq_lane"); 5736 case NEON::BI__builtin_neon_vget_lane_f32: 5737 Ops[0] = Builder.CreateBitCast(Ops[0], 5738 llvm::VectorType::get(FloatTy, 2)); 5739 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5740 "vget_lane"); 5741 case NEON::BI__builtin_neon_vget_lane_f64: 5742 Ops[0] = Builder.CreateBitCast(Ops[0], 5743 llvm::VectorType::get(DoubleTy, 1)); 5744 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5745 "vget_lane"); 5746 case NEON::BI__builtin_neon_vgetq_lane_f32: 5747 case NEON::BI__builtin_neon_vdups_laneq_f32: 5748 Ops[0] = Builder.CreateBitCast(Ops[0], 5749 llvm::VectorType::get(FloatTy, 4)); 5750 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5751 "vgetq_lane"); 5752 case NEON::BI__builtin_neon_vgetq_lane_f64: 5753 case NEON::BI__builtin_neon_vdupd_laneq_f64: 5754 Ops[0] = Builder.CreateBitCast(Ops[0], 5755 llvm::VectorType::get(DoubleTy, 2)); 5756 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5757 "vgetq_lane"); 5758 case NEON::BI__builtin_neon_vaddd_s64: 5759 case NEON::BI__builtin_neon_vaddd_u64: 5760 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); 5761 case NEON::BI__builtin_neon_vsubd_s64: 5762 case NEON::BI__builtin_neon_vsubd_u64: 5763 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd"); 5764 case NEON::BI__builtin_neon_vqdmlalh_s16: 5765 case NEON::BI__builtin_neon_vqdmlslh_s16: { 5766 SmallVector<Value *, 2> ProductOps; 5767 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 5768 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); 5769 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 5770 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 5771 ProductOps, "vqdmlXl"); 5772 Constant *CI = ConstantInt::get(SizeTy, 0); 5773 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 5774 5775 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 5776 ? Intrinsic::aarch64_neon_sqadd 5777 : Intrinsic::aarch64_neon_sqsub; 5778 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl"); 5779 } 5780 case NEON::BI__builtin_neon_vqshlud_n_s64: { 5781 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5782 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 5783 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty), 5784 Ops, "vqshlu_n"); 5785 } 5786 case NEON::BI__builtin_neon_vqshld_n_u64: 5787 case NEON::BI__builtin_neon_vqshld_n_s64: { 5788 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64 5789 ? Intrinsic::aarch64_neon_uqshl 5790 : Intrinsic::aarch64_neon_sqshl; 5791 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5792 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 5793 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n"); 5794 } 5795 case NEON::BI__builtin_neon_vrshrd_n_u64: 5796 case NEON::BI__builtin_neon_vrshrd_n_s64: { 5797 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64 5798 ? Intrinsic::aarch64_neon_urshl 5799 : Intrinsic::aarch64_neon_srshl; 5800 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5801 int SV = cast<ConstantInt>(Ops[1])->getSExtValue(); 5802 Ops[1] = ConstantInt::get(Int64Ty, -SV); 5803 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n"); 5804 } 5805 case NEON::BI__builtin_neon_vrsrad_n_u64: 5806 case NEON::BI__builtin_neon_vrsrad_n_s64: { 5807 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64 5808 ? Intrinsic::aarch64_neon_urshl 5809 : Intrinsic::aarch64_neon_srshl; 5810 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5811 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); 5812 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty), 5813 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)}); 5814 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); 5815 } 5816 case NEON::BI__builtin_neon_vshld_n_s64: 5817 case NEON::BI__builtin_neon_vshld_n_u64: { 5818 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 5819 return Builder.CreateShl( 5820 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n"); 5821 } 5822 case NEON::BI__builtin_neon_vshrd_n_s64: { 5823 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 5824 return Builder.CreateAShr( 5825 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 5826 Amt->getZExtValue())), 5827 "shrd_n"); 5828 } 5829 case NEON::BI__builtin_neon_vshrd_n_u64: { 5830 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 5831 uint64_t ShiftAmt = Amt->getZExtValue(); 5832 // Right-shifting an unsigned value by its size yields 0. 5833 if (ShiftAmt == 64) 5834 return ConstantInt::get(Int64Ty, 0); 5835 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt), 5836 "shrd_n"); 5837 } 5838 case NEON::BI__builtin_neon_vsrad_n_s64: { 5839 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 5840 Ops[1] = Builder.CreateAShr( 5841 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 5842 Amt->getZExtValue())), 5843 "shrd_n"); 5844 return Builder.CreateAdd(Ops[0], Ops[1]); 5845 } 5846 case NEON::BI__builtin_neon_vsrad_n_u64: { 5847 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 5848 uint64_t ShiftAmt = Amt->getZExtValue(); 5849 // Right-shifting an unsigned value by its size yields 0. 5850 // As Op + 0 = Op, return Ops[0] directly. 5851 if (ShiftAmt == 64) 5852 return Ops[0]; 5853 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt), 5854 "shrd_n"); 5855 return Builder.CreateAdd(Ops[0], Ops[1]); 5856 } 5857 case NEON::BI__builtin_neon_vqdmlalh_lane_s16: 5858 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: 5859 case NEON::BI__builtin_neon_vqdmlslh_lane_s16: 5860 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: { 5861 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 5862 "lane"); 5863 SmallVector<Value *, 2> ProductOps; 5864 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 5865 ProductOps.push_back(vectorWrapScalar16(Ops[2])); 5866 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 5867 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 5868 ProductOps, "vqdmlXl"); 5869 Constant *CI = ConstantInt::get(SizeTy, 0); 5870 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 5871 Ops.pop_back(); 5872 5873 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 || 5874 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16) 5875 ? Intrinsic::aarch64_neon_sqadd 5876 : Intrinsic::aarch64_neon_sqsub; 5877 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl"); 5878 } 5879 case NEON::BI__builtin_neon_vqdmlals_s32: 5880 case NEON::BI__builtin_neon_vqdmlsls_s32: { 5881 SmallVector<Value *, 2> ProductOps; 5882 ProductOps.push_back(Ops[1]); 5883 ProductOps.push_back(EmitScalarExpr(E->getArg(2))); 5884 Ops[1] = 5885 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 5886 ProductOps, "vqdmlXl"); 5887 5888 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32 5889 ? Intrinsic::aarch64_neon_sqadd 5890 : Intrinsic::aarch64_neon_sqsub; 5891 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl"); 5892 } 5893 case NEON::BI__builtin_neon_vqdmlals_lane_s32: 5894 case NEON::BI__builtin_neon_vqdmlals_laneq_s32: 5895 case NEON::BI__builtin_neon_vqdmlsls_lane_s32: 5896 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { 5897 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 5898 "lane"); 5899 SmallVector<Value *, 2> ProductOps; 5900 ProductOps.push_back(Ops[1]); 5901 ProductOps.push_back(Ops[2]); 5902 Ops[1] = 5903 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 5904 ProductOps, "vqdmlXl"); 5905 Ops.pop_back(); 5906 5907 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 || 5908 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32) 5909 ? Intrinsic::aarch64_neon_sqadd 5910 : Intrinsic::aarch64_neon_sqsub; 5911 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); 5912 } 5913 } 5914 5915 llvm::VectorType *VTy = GetNeonType(this, Type); 5916 llvm::Type *Ty = VTy; 5917 if (!Ty) 5918 return nullptr; 5919 5920 // Not all intrinsics handled by the common case work for AArch64 yet, so only 5921 // defer to common code if it's been added to our special map. 5922 Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, 5923 AArch64SIMDIntrinsicsProvenSorted); 5924 5925 if (Builtin) 5926 return EmitCommonNeonBuiltinExpr( 5927 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 5928 Builtin->NameHint, Builtin->TypeModifier, E, Ops, 5929 /*never use addresses*/ Address::invalid(), Address::invalid()); 5930 5931 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops)) 5932 return V; 5933 5934 unsigned Int; 5935 switch (BuiltinID) { 5936 default: return nullptr; 5937 case NEON::BI__builtin_neon_vbsl_v: 5938 case NEON::BI__builtin_neon_vbslq_v: { 5939 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy); 5940 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl"); 5941 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl"); 5942 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl"); 5943 5944 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl"); 5945 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl"); 5946 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl"); 5947 return Builder.CreateBitCast(Ops[0], Ty); 5948 } 5949 case NEON::BI__builtin_neon_vfma_lane_v: 5950 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types 5951 // The ARM builtins (and instructions) have the addend as the first 5952 // operand, but the 'fma' intrinsics have it last. Swap it around here. 5953 Value *Addend = Ops[0]; 5954 Value *Multiplicand = Ops[1]; 5955 Value *LaneSource = Ops[2]; 5956 Ops[0] = Multiplicand; 5957 Ops[1] = LaneSource; 5958 Ops[2] = Addend; 5959 5960 // Now adjust things to handle the lane access. 5961 llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ? 5962 llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) : 5963 VTy; 5964 llvm::Constant *cst = cast<Constant>(Ops[3]); 5965 Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst); 5966 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); 5967 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); 5968 5969 Ops.pop_back(); 5970 Int = Intrinsic::fma; 5971 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); 5972 } 5973 case NEON::BI__builtin_neon_vfma_laneq_v: { 5974 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 5975 // v1f64 fma should be mapped to Neon scalar f64 fma 5976 if (VTy && VTy->getElementType() == DoubleTy) { 5977 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5978 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 5979 llvm::Type *VTy = GetNeonType(this, 5980 NeonTypeFlags(NeonTypeFlags::Float64, false, true)); 5981 Ops[2] = Builder.CreateBitCast(Ops[2], VTy); 5982 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 5983 Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); 5984 Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 5985 return Builder.CreateBitCast(Result, Ty); 5986 } 5987 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 5988 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5989 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5990 5991 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 5992 VTy->getNumElements() * 2); 5993 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 5994 Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), 5995 cast<ConstantInt>(Ops[3])); 5996 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 5997 5998 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 5999 } 6000 case NEON::BI__builtin_neon_vfmaq_laneq_v: { 6001 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 6002 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6003 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6004 6005 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6006 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 6007 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 6008 } 6009 case NEON::BI__builtin_neon_vfmas_lane_f32: 6010 case NEON::BI__builtin_neon_vfmas_laneq_f32: 6011 case NEON::BI__builtin_neon_vfmad_lane_f64: 6012 case NEON::BI__builtin_neon_vfmad_laneq_f64: { 6013 Ops.push_back(EmitScalarExpr(E->getArg(3))); 6014 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 6015 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 6016 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 6017 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 6018 } 6019 case NEON::BI__builtin_neon_vmull_v: 6020 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6021 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; 6022 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull; 6023 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 6024 case NEON::BI__builtin_neon_vmax_v: 6025 case NEON::BI__builtin_neon_vmaxq_v: 6026 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6027 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; 6028 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; 6029 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); 6030 case NEON::BI__builtin_neon_vmin_v: 6031 case NEON::BI__builtin_neon_vminq_v: 6032 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6033 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; 6034 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; 6035 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); 6036 case NEON::BI__builtin_neon_vabd_v: 6037 case NEON::BI__builtin_neon_vabdq_v: 6038 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6039 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd; 6040 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd; 6041 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); 6042 case NEON::BI__builtin_neon_vpadal_v: 6043 case NEON::BI__builtin_neon_vpadalq_v: { 6044 unsigned ArgElts = VTy->getNumElements(); 6045 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType()); 6046 unsigned BitWidth = EltTy->getBitWidth(); 6047 llvm::Type *ArgTy = llvm::VectorType::get( 6048 llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts); 6049 llvm::Type* Tys[2] = { VTy, ArgTy }; 6050 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp; 6051 SmallVector<llvm::Value*, 1> TmpOps; 6052 TmpOps.push_back(Ops[1]); 6053 Function *F = CGM.getIntrinsic(Int, Tys); 6054 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal"); 6055 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType()); 6056 return Builder.CreateAdd(tmp, addend); 6057 } 6058 case NEON::BI__builtin_neon_vpmin_v: 6059 case NEON::BI__builtin_neon_vpminq_v: 6060 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6061 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp; 6062 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp; 6063 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 6064 case NEON::BI__builtin_neon_vpmax_v: 6065 case NEON::BI__builtin_neon_vpmaxq_v: 6066 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6067 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp; 6068 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp; 6069 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 6070 case NEON::BI__builtin_neon_vminnm_v: 6071 case NEON::BI__builtin_neon_vminnmq_v: 6072 Int = Intrinsic::aarch64_neon_fminnm; 6073 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); 6074 case NEON::BI__builtin_neon_vmaxnm_v: 6075 case NEON::BI__builtin_neon_vmaxnmq_v: 6076 Int = Intrinsic::aarch64_neon_fmaxnm; 6077 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); 6078 case NEON::BI__builtin_neon_vrecpss_f32: { 6079 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6080 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy), 6081 Ops, "vrecps"); 6082 } 6083 case NEON::BI__builtin_neon_vrecpsd_f64: { 6084 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6085 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy), 6086 Ops, "vrecps"); 6087 } 6088 case NEON::BI__builtin_neon_vqshrun_n_v: 6089 Int = Intrinsic::aarch64_neon_sqshrun; 6090 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); 6091 case NEON::BI__builtin_neon_vqrshrun_n_v: 6092 Int = Intrinsic::aarch64_neon_sqrshrun; 6093 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); 6094 case NEON::BI__builtin_neon_vqshrn_n_v: 6095 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn; 6096 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); 6097 case NEON::BI__builtin_neon_vrshrn_n_v: 6098 Int = Intrinsic::aarch64_neon_rshrn; 6099 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); 6100 case NEON::BI__builtin_neon_vqrshrn_n_v: 6101 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; 6102 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); 6103 case NEON::BI__builtin_neon_vrnda_v: 6104 case NEON::BI__builtin_neon_vrndaq_v: { 6105 Int = Intrinsic::round; 6106 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); 6107 } 6108 case NEON::BI__builtin_neon_vrndi_v: 6109 case NEON::BI__builtin_neon_vrndiq_v: { 6110 Int = Intrinsic::nearbyint; 6111 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); 6112 } 6113 case NEON::BI__builtin_neon_vrndm_v: 6114 case NEON::BI__builtin_neon_vrndmq_v: { 6115 Int = Intrinsic::floor; 6116 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); 6117 } 6118 case NEON::BI__builtin_neon_vrndn_v: 6119 case NEON::BI__builtin_neon_vrndnq_v: { 6120 Int = Intrinsic::aarch64_neon_frintn; 6121 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); 6122 } 6123 case NEON::BI__builtin_neon_vrndp_v: 6124 case NEON::BI__builtin_neon_vrndpq_v: { 6125 Int = Intrinsic::ceil; 6126 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); 6127 } 6128 case NEON::BI__builtin_neon_vrndx_v: 6129 case NEON::BI__builtin_neon_vrndxq_v: { 6130 Int = Intrinsic::rint; 6131 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); 6132 } 6133 case NEON::BI__builtin_neon_vrnd_v: 6134 case NEON::BI__builtin_neon_vrndq_v: { 6135 Int = Intrinsic::trunc; 6136 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); 6137 } 6138 case NEON::BI__builtin_neon_vceqz_v: 6139 case NEON::BI__builtin_neon_vceqzq_v: 6140 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, 6141 ICmpInst::ICMP_EQ, "vceqz"); 6142 case NEON::BI__builtin_neon_vcgez_v: 6143 case NEON::BI__builtin_neon_vcgezq_v: 6144 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, 6145 ICmpInst::ICMP_SGE, "vcgez"); 6146 case NEON::BI__builtin_neon_vclez_v: 6147 case NEON::BI__builtin_neon_vclezq_v: 6148 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, 6149 ICmpInst::ICMP_SLE, "vclez"); 6150 case NEON::BI__builtin_neon_vcgtz_v: 6151 case NEON::BI__builtin_neon_vcgtzq_v: 6152 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, 6153 ICmpInst::ICMP_SGT, "vcgtz"); 6154 case NEON::BI__builtin_neon_vcltz_v: 6155 case NEON::BI__builtin_neon_vcltzq_v: 6156 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, 6157 ICmpInst::ICMP_SLT, "vcltz"); 6158 case NEON::BI__builtin_neon_vcvt_f64_v: 6159 case NEON::BI__builtin_neon_vcvtq_f64_v: 6160 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6161 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 6162 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 6163 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 6164 case NEON::BI__builtin_neon_vcvt_f64_f32: { 6165 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && 6166 "unexpected vcvt_f64_f32 builtin"); 6167 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); 6168 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 6169 6170 return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); 6171 } 6172 case NEON::BI__builtin_neon_vcvt_f32_f64: { 6173 assert(Type.getEltType() == NeonTypeFlags::Float32 && 6174 "unexpected vcvt_f32_f64 builtin"); 6175 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); 6176 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 6177 6178 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); 6179 } 6180 case NEON::BI__builtin_neon_vcvt_s32_v: 6181 case NEON::BI__builtin_neon_vcvt_u32_v: 6182 case NEON::BI__builtin_neon_vcvt_s64_v: 6183 case NEON::BI__builtin_neon_vcvt_u64_v: 6184 case NEON::BI__builtin_neon_vcvtq_s32_v: 6185 case NEON::BI__builtin_neon_vcvtq_u32_v: 6186 case NEON::BI__builtin_neon_vcvtq_s64_v: 6187 case NEON::BI__builtin_neon_vcvtq_u64_v: { 6188 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 6189 if (usgn) 6190 return Builder.CreateFPToUI(Ops[0], Ty); 6191 return Builder.CreateFPToSI(Ops[0], Ty); 6192 } 6193 case NEON::BI__builtin_neon_vcvta_s32_v: 6194 case NEON::BI__builtin_neon_vcvtaq_s32_v: 6195 case NEON::BI__builtin_neon_vcvta_u32_v: 6196 case NEON::BI__builtin_neon_vcvtaq_u32_v: 6197 case NEON::BI__builtin_neon_vcvta_s64_v: 6198 case NEON::BI__builtin_neon_vcvtaq_s64_v: 6199 case NEON::BI__builtin_neon_vcvta_u64_v: 6200 case NEON::BI__builtin_neon_vcvtaq_u64_v: { 6201 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas; 6202 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6203 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); 6204 } 6205 case NEON::BI__builtin_neon_vcvtm_s32_v: 6206 case NEON::BI__builtin_neon_vcvtmq_s32_v: 6207 case NEON::BI__builtin_neon_vcvtm_u32_v: 6208 case NEON::BI__builtin_neon_vcvtmq_u32_v: 6209 case NEON::BI__builtin_neon_vcvtm_s64_v: 6210 case NEON::BI__builtin_neon_vcvtmq_s64_v: 6211 case NEON::BI__builtin_neon_vcvtm_u64_v: 6212 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 6213 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms; 6214 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6215 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); 6216 } 6217 case NEON::BI__builtin_neon_vcvtn_s32_v: 6218 case NEON::BI__builtin_neon_vcvtnq_s32_v: 6219 case NEON::BI__builtin_neon_vcvtn_u32_v: 6220 case NEON::BI__builtin_neon_vcvtnq_u32_v: 6221 case NEON::BI__builtin_neon_vcvtn_s64_v: 6222 case NEON::BI__builtin_neon_vcvtnq_s64_v: 6223 case NEON::BI__builtin_neon_vcvtn_u64_v: 6224 case NEON::BI__builtin_neon_vcvtnq_u64_v: { 6225 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns; 6226 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6227 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); 6228 } 6229 case NEON::BI__builtin_neon_vcvtp_s32_v: 6230 case NEON::BI__builtin_neon_vcvtpq_s32_v: 6231 case NEON::BI__builtin_neon_vcvtp_u32_v: 6232 case NEON::BI__builtin_neon_vcvtpq_u32_v: 6233 case NEON::BI__builtin_neon_vcvtp_s64_v: 6234 case NEON::BI__builtin_neon_vcvtpq_s64_v: 6235 case NEON::BI__builtin_neon_vcvtp_u64_v: 6236 case NEON::BI__builtin_neon_vcvtpq_u64_v: { 6237 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps; 6238 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6239 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); 6240 } 6241 case NEON::BI__builtin_neon_vmulx_v: 6242 case NEON::BI__builtin_neon_vmulxq_v: { 6243 Int = Intrinsic::aarch64_neon_fmulx; 6244 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 6245 } 6246 case NEON::BI__builtin_neon_vmul_lane_v: 6247 case NEON::BI__builtin_neon_vmul_laneq_v: { 6248 // v1f64 vmul_lane should be mapped to Neon scalar mul lane 6249 bool Quad = false; 6250 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) 6251 Quad = true; 6252 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6253 llvm::Type *VTy = GetNeonType(this, 6254 NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); 6255 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 6256 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 6257 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); 6258 return Builder.CreateBitCast(Result, Ty); 6259 } 6260 case NEON::BI__builtin_neon_vnegd_s64: 6261 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); 6262 case NEON::BI__builtin_neon_vpmaxnm_v: 6263 case NEON::BI__builtin_neon_vpmaxnmq_v: { 6264 Int = Intrinsic::aarch64_neon_fmaxnmp; 6265 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); 6266 } 6267 case NEON::BI__builtin_neon_vpminnm_v: 6268 case NEON::BI__builtin_neon_vpminnmq_v: { 6269 Int = Intrinsic::aarch64_neon_fminnmp; 6270 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); 6271 } 6272 case NEON::BI__builtin_neon_vsqrt_v: 6273 case NEON::BI__builtin_neon_vsqrtq_v: { 6274 Int = Intrinsic::sqrt; 6275 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6276 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); 6277 } 6278 case NEON::BI__builtin_neon_vrbit_v: 6279 case NEON::BI__builtin_neon_vrbitq_v: { 6280 Int = Intrinsic::aarch64_neon_rbit; 6281 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); 6282 } 6283 case NEON::BI__builtin_neon_vaddv_u8: 6284 // FIXME: These are handled by the AArch64 scalar code. 6285 usgn = true; 6286 // FALLTHROUGH 6287 case NEON::BI__builtin_neon_vaddv_s8: { 6288 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6289 Ty = Int32Ty; 6290 VTy = llvm::VectorType::get(Int8Ty, 8); 6291 llvm::Type *Tys[2] = { Ty, VTy }; 6292 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6293 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6294 return Builder.CreateTrunc(Ops[0], Int8Ty); 6295 } 6296 case NEON::BI__builtin_neon_vaddv_u16: 6297 usgn = true; 6298 // FALLTHROUGH 6299 case NEON::BI__builtin_neon_vaddv_s16: { 6300 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6301 Ty = Int32Ty; 6302 VTy = llvm::VectorType::get(Int16Ty, 4); 6303 llvm::Type *Tys[2] = { Ty, VTy }; 6304 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6305 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6306 return Builder.CreateTrunc(Ops[0], Int16Ty); 6307 } 6308 case NEON::BI__builtin_neon_vaddvq_u8: 6309 usgn = true; 6310 // FALLTHROUGH 6311 case NEON::BI__builtin_neon_vaddvq_s8: { 6312 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6313 Ty = Int32Ty; 6314 VTy = llvm::VectorType::get(Int8Ty, 16); 6315 llvm::Type *Tys[2] = { Ty, VTy }; 6316 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6317 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6318 return Builder.CreateTrunc(Ops[0], Int8Ty); 6319 } 6320 case NEON::BI__builtin_neon_vaddvq_u16: 6321 usgn = true; 6322 // FALLTHROUGH 6323 case NEON::BI__builtin_neon_vaddvq_s16: { 6324 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6325 Ty = Int32Ty; 6326 VTy = llvm::VectorType::get(Int16Ty, 8); 6327 llvm::Type *Tys[2] = { Ty, VTy }; 6328 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6329 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6330 return Builder.CreateTrunc(Ops[0], Int16Ty); 6331 } 6332 case NEON::BI__builtin_neon_vmaxv_u8: { 6333 Int = Intrinsic::aarch64_neon_umaxv; 6334 Ty = Int32Ty; 6335 VTy = llvm::VectorType::get(Int8Ty, 8); 6336 llvm::Type *Tys[2] = { Ty, VTy }; 6337 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6338 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6339 return Builder.CreateTrunc(Ops[0], Int8Ty); 6340 } 6341 case NEON::BI__builtin_neon_vmaxv_u16: { 6342 Int = Intrinsic::aarch64_neon_umaxv; 6343 Ty = Int32Ty; 6344 VTy = llvm::VectorType::get(Int16Ty, 4); 6345 llvm::Type *Tys[2] = { Ty, VTy }; 6346 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6347 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6348 return Builder.CreateTrunc(Ops[0], Int16Ty); 6349 } 6350 case NEON::BI__builtin_neon_vmaxvq_u8: { 6351 Int = Intrinsic::aarch64_neon_umaxv; 6352 Ty = Int32Ty; 6353 VTy = llvm::VectorType::get(Int8Ty, 16); 6354 llvm::Type *Tys[2] = { Ty, VTy }; 6355 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6356 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6357 return Builder.CreateTrunc(Ops[0], Int8Ty); 6358 } 6359 case NEON::BI__builtin_neon_vmaxvq_u16: { 6360 Int = Intrinsic::aarch64_neon_umaxv; 6361 Ty = Int32Ty; 6362 VTy = llvm::VectorType::get(Int16Ty, 8); 6363 llvm::Type *Tys[2] = { Ty, VTy }; 6364 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6365 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6366 return Builder.CreateTrunc(Ops[0], Int16Ty); 6367 } 6368 case NEON::BI__builtin_neon_vmaxv_s8: { 6369 Int = Intrinsic::aarch64_neon_smaxv; 6370 Ty = Int32Ty; 6371 VTy = llvm::VectorType::get(Int8Ty, 8); 6372 llvm::Type *Tys[2] = { Ty, VTy }; 6373 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6374 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6375 return Builder.CreateTrunc(Ops[0], Int8Ty); 6376 } 6377 case NEON::BI__builtin_neon_vmaxv_s16: { 6378 Int = Intrinsic::aarch64_neon_smaxv; 6379 Ty = Int32Ty; 6380 VTy = llvm::VectorType::get(Int16Ty, 4); 6381 llvm::Type *Tys[2] = { Ty, VTy }; 6382 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6383 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6384 return Builder.CreateTrunc(Ops[0], Int16Ty); 6385 } 6386 case NEON::BI__builtin_neon_vmaxvq_s8: { 6387 Int = Intrinsic::aarch64_neon_smaxv; 6388 Ty = Int32Ty; 6389 VTy = llvm::VectorType::get(Int8Ty, 16); 6390 llvm::Type *Tys[2] = { Ty, VTy }; 6391 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6392 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6393 return Builder.CreateTrunc(Ops[0], Int8Ty); 6394 } 6395 case NEON::BI__builtin_neon_vmaxvq_s16: { 6396 Int = Intrinsic::aarch64_neon_smaxv; 6397 Ty = Int32Ty; 6398 VTy = llvm::VectorType::get(Int16Ty, 8); 6399 llvm::Type *Tys[2] = { Ty, VTy }; 6400 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6401 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6402 return Builder.CreateTrunc(Ops[0], Int16Ty); 6403 } 6404 case NEON::BI__builtin_neon_vminv_u8: { 6405 Int = Intrinsic::aarch64_neon_uminv; 6406 Ty = Int32Ty; 6407 VTy = llvm::VectorType::get(Int8Ty, 8); 6408 llvm::Type *Tys[2] = { Ty, VTy }; 6409 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6410 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6411 return Builder.CreateTrunc(Ops[0], Int8Ty); 6412 } 6413 case NEON::BI__builtin_neon_vminv_u16: { 6414 Int = Intrinsic::aarch64_neon_uminv; 6415 Ty = Int32Ty; 6416 VTy = llvm::VectorType::get(Int16Ty, 4); 6417 llvm::Type *Tys[2] = { Ty, VTy }; 6418 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6419 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6420 return Builder.CreateTrunc(Ops[0], Int16Ty); 6421 } 6422 case NEON::BI__builtin_neon_vminvq_u8: { 6423 Int = Intrinsic::aarch64_neon_uminv; 6424 Ty = Int32Ty; 6425 VTy = llvm::VectorType::get(Int8Ty, 16); 6426 llvm::Type *Tys[2] = { Ty, VTy }; 6427 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6428 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6429 return Builder.CreateTrunc(Ops[0], Int8Ty); 6430 } 6431 case NEON::BI__builtin_neon_vminvq_u16: { 6432 Int = Intrinsic::aarch64_neon_uminv; 6433 Ty = Int32Ty; 6434 VTy = llvm::VectorType::get(Int16Ty, 8); 6435 llvm::Type *Tys[2] = { Ty, VTy }; 6436 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6437 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6438 return Builder.CreateTrunc(Ops[0], Int16Ty); 6439 } 6440 case NEON::BI__builtin_neon_vminv_s8: { 6441 Int = Intrinsic::aarch64_neon_sminv; 6442 Ty = Int32Ty; 6443 VTy = llvm::VectorType::get(Int8Ty, 8); 6444 llvm::Type *Tys[2] = { Ty, VTy }; 6445 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6446 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6447 return Builder.CreateTrunc(Ops[0], Int8Ty); 6448 } 6449 case NEON::BI__builtin_neon_vminv_s16: { 6450 Int = Intrinsic::aarch64_neon_sminv; 6451 Ty = Int32Ty; 6452 VTy = llvm::VectorType::get(Int16Ty, 4); 6453 llvm::Type *Tys[2] = { Ty, VTy }; 6454 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6455 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6456 return Builder.CreateTrunc(Ops[0], Int16Ty); 6457 } 6458 case NEON::BI__builtin_neon_vminvq_s8: { 6459 Int = Intrinsic::aarch64_neon_sminv; 6460 Ty = Int32Ty; 6461 VTy = llvm::VectorType::get(Int8Ty, 16); 6462 llvm::Type *Tys[2] = { Ty, VTy }; 6463 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6464 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6465 return Builder.CreateTrunc(Ops[0], Int8Ty); 6466 } 6467 case NEON::BI__builtin_neon_vminvq_s16: { 6468 Int = Intrinsic::aarch64_neon_sminv; 6469 Ty = Int32Ty; 6470 VTy = llvm::VectorType::get(Int16Ty, 8); 6471 llvm::Type *Tys[2] = { Ty, VTy }; 6472 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6473 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6474 return Builder.CreateTrunc(Ops[0], Int16Ty); 6475 } 6476 case NEON::BI__builtin_neon_vmul_n_f64: { 6477 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6478 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); 6479 return Builder.CreateFMul(Ops[0], RHS); 6480 } 6481 case NEON::BI__builtin_neon_vaddlv_u8: { 6482 Int = Intrinsic::aarch64_neon_uaddlv; 6483 Ty = Int32Ty; 6484 VTy = llvm::VectorType::get(Int8Ty, 8); 6485 llvm::Type *Tys[2] = { Ty, VTy }; 6486 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6487 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6488 return Builder.CreateTrunc(Ops[0], Int16Ty); 6489 } 6490 case NEON::BI__builtin_neon_vaddlv_u16: { 6491 Int = Intrinsic::aarch64_neon_uaddlv; 6492 Ty = Int32Ty; 6493 VTy = llvm::VectorType::get(Int16Ty, 4); 6494 llvm::Type *Tys[2] = { Ty, VTy }; 6495 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6496 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6497 } 6498 case NEON::BI__builtin_neon_vaddlvq_u8: { 6499 Int = Intrinsic::aarch64_neon_uaddlv; 6500 Ty = Int32Ty; 6501 VTy = llvm::VectorType::get(Int8Ty, 16); 6502 llvm::Type *Tys[2] = { Ty, VTy }; 6503 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6504 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6505 return Builder.CreateTrunc(Ops[0], Int16Ty); 6506 } 6507 case NEON::BI__builtin_neon_vaddlvq_u16: { 6508 Int = Intrinsic::aarch64_neon_uaddlv; 6509 Ty = Int32Ty; 6510 VTy = llvm::VectorType::get(Int16Ty, 8); 6511 llvm::Type *Tys[2] = { Ty, VTy }; 6512 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6513 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6514 } 6515 case NEON::BI__builtin_neon_vaddlv_s8: { 6516 Int = Intrinsic::aarch64_neon_saddlv; 6517 Ty = Int32Ty; 6518 VTy = llvm::VectorType::get(Int8Ty, 8); 6519 llvm::Type *Tys[2] = { Ty, VTy }; 6520 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6521 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6522 return Builder.CreateTrunc(Ops[0], Int16Ty); 6523 } 6524 case NEON::BI__builtin_neon_vaddlv_s16: { 6525 Int = Intrinsic::aarch64_neon_saddlv; 6526 Ty = Int32Ty; 6527 VTy = llvm::VectorType::get(Int16Ty, 4); 6528 llvm::Type *Tys[2] = { Ty, VTy }; 6529 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6530 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6531 } 6532 case NEON::BI__builtin_neon_vaddlvq_s8: { 6533 Int = Intrinsic::aarch64_neon_saddlv; 6534 Ty = Int32Ty; 6535 VTy = llvm::VectorType::get(Int8Ty, 16); 6536 llvm::Type *Tys[2] = { Ty, VTy }; 6537 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6538 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6539 return Builder.CreateTrunc(Ops[0], Int16Ty); 6540 } 6541 case NEON::BI__builtin_neon_vaddlvq_s16: { 6542 Int = Intrinsic::aarch64_neon_saddlv; 6543 Ty = Int32Ty; 6544 VTy = llvm::VectorType::get(Int16Ty, 8); 6545 llvm::Type *Tys[2] = { Ty, VTy }; 6546 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6547 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6548 } 6549 case NEON::BI__builtin_neon_vsri_n_v: 6550 case NEON::BI__builtin_neon_vsriq_n_v: { 6551 Int = Intrinsic::aarch64_neon_vsri; 6552 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 6553 return EmitNeonCall(Intrin, Ops, "vsri_n"); 6554 } 6555 case NEON::BI__builtin_neon_vsli_n_v: 6556 case NEON::BI__builtin_neon_vsliq_n_v: { 6557 Int = Intrinsic::aarch64_neon_vsli; 6558 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 6559 return EmitNeonCall(Intrin, Ops, "vsli_n"); 6560 } 6561 case NEON::BI__builtin_neon_vsra_n_v: 6562 case NEON::BI__builtin_neon_vsraq_n_v: 6563 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6564 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 6565 return Builder.CreateAdd(Ops[0], Ops[1]); 6566 case NEON::BI__builtin_neon_vrsra_n_v: 6567 case NEON::BI__builtin_neon_vrsraq_n_v: { 6568 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; 6569 SmallVector<llvm::Value*,2> TmpOps; 6570 TmpOps.push_back(Ops[1]); 6571 TmpOps.push_back(Ops[2]); 6572 Function* F = CGM.getIntrinsic(Int, Ty); 6573 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true); 6574 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 6575 return Builder.CreateAdd(Ops[0], tmp); 6576 } 6577 // FIXME: Sharing loads & stores with 32-bit is complicated by the absence 6578 // of an Align parameter here. 6579 case NEON::BI__builtin_neon_vld1_x2_v: 6580 case NEON::BI__builtin_neon_vld1q_x2_v: 6581 case NEON::BI__builtin_neon_vld1_x3_v: 6582 case NEON::BI__builtin_neon_vld1q_x3_v: 6583 case NEON::BI__builtin_neon_vld1_x4_v: 6584 case NEON::BI__builtin_neon_vld1q_x4_v: { 6585 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 6586 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6587 llvm::Type *Tys[2] = { VTy, PTy }; 6588 unsigned Int; 6589 switch (BuiltinID) { 6590 case NEON::BI__builtin_neon_vld1_x2_v: 6591 case NEON::BI__builtin_neon_vld1q_x2_v: 6592 Int = Intrinsic::aarch64_neon_ld1x2; 6593 break; 6594 case NEON::BI__builtin_neon_vld1_x3_v: 6595 case NEON::BI__builtin_neon_vld1q_x3_v: 6596 Int = Intrinsic::aarch64_neon_ld1x3; 6597 break; 6598 case NEON::BI__builtin_neon_vld1_x4_v: 6599 case NEON::BI__builtin_neon_vld1q_x4_v: 6600 Int = Intrinsic::aarch64_neon_ld1x4; 6601 break; 6602 } 6603 Function *F = CGM.getIntrinsic(Int, Tys); 6604 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); 6605 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6606 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6607 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6608 } 6609 case NEON::BI__builtin_neon_vst1_x2_v: 6610 case NEON::BI__builtin_neon_vst1q_x2_v: 6611 case NEON::BI__builtin_neon_vst1_x3_v: 6612 case NEON::BI__builtin_neon_vst1q_x3_v: 6613 case NEON::BI__builtin_neon_vst1_x4_v: 6614 case NEON::BI__builtin_neon_vst1q_x4_v: { 6615 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 6616 llvm::Type *Tys[2] = { VTy, PTy }; 6617 unsigned Int; 6618 switch (BuiltinID) { 6619 case NEON::BI__builtin_neon_vst1_x2_v: 6620 case NEON::BI__builtin_neon_vst1q_x2_v: 6621 Int = Intrinsic::aarch64_neon_st1x2; 6622 break; 6623 case NEON::BI__builtin_neon_vst1_x3_v: 6624 case NEON::BI__builtin_neon_vst1q_x3_v: 6625 Int = Intrinsic::aarch64_neon_st1x3; 6626 break; 6627 case NEON::BI__builtin_neon_vst1_x4_v: 6628 case NEON::BI__builtin_neon_vst1q_x4_v: 6629 Int = Intrinsic::aarch64_neon_st1x4; 6630 break; 6631 } 6632 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); 6633 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 6634 } 6635 case NEON::BI__builtin_neon_vld1_v: 6636 case NEON::BI__builtin_neon_vld1q_v: { 6637 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 6638 auto Alignment = CharUnits::fromQuantity( 6639 BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16); 6640 return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment); 6641 } 6642 case NEON::BI__builtin_neon_vst1_v: 6643 case NEON::BI__builtin_neon_vst1q_v: 6644 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 6645 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 6646 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6647 case NEON::BI__builtin_neon_vld1_lane_v: 6648 case NEON::BI__builtin_neon_vld1q_lane_v: { 6649 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6650 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 6651 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6652 auto Alignment = CharUnits::fromQuantity( 6653 BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16); 6654 Ops[0] = 6655 Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); 6656 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 6657 } 6658 case NEON::BI__builtin_neon_vld1_dup_v: 6659 case NEON::BI__builtin_neon_vld1q_dup_v: { 6660 Value *V = UndefValue::get(Ty); 6661 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 6662 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6663 auto Alignment = CharUnits::fromQuantity( 6664 BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16); 6665 Ops[0] = 6666 Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); 6667 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 6668 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 6669 return EmitNeonSplat(Ops[0], CI); 6670 } 6671 case NEON::BI__builtin_neon_vst1_lane_v: 6672 case NEON::BI__builtin_neon_vst1q_lane_v: 6673 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6674 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 6675 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6676 return Builder.CreateDefaultAlignedStore(Ops[1], 6677 Builder.CreateBitCast(Ops[0], Ty)); 6678 case NEON::BI__builtin_neon_vld2_v: 6679 case NEON::BI__builtin_neon_vld2q_v: { 6680 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6681 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6682 llvm::Type *Tys[2] = { VTy, PTy }; 6683 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); 6684 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 6685 Ops[0] = Builder.CreateBitCast(Ops[0], 6686 llvm::PointerType::getUnqual(Ops[1]->getType())); 6687 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6688 } 6689 case NEON::BI__builtin_neon_vld3_v: 6690 case NEON::BI__builtin_neon_vld3q_v: { 6691 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6692 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6693 llvm::Type *Tys[2] = { VTy, PTy }; 6694 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); 6695 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 6696 Ops[0] = Builder.CreateBitCast(Ops[0], 6697 llvm::PointerType::getUnqual(Ops[1]->getType())); 6698 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6699 } 6700 case NEON::BI__builtin_neon_vld4_v: 6701 case NEON::BI__builtin_neon_vld4q_v: { 6702 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6703 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6704 llvm::Type *Tys[2] = { VTy, PTy }; 6705 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); 6706 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 6707 Ops[0] = Builder.CreateBitCast(Ops[0], 6708 llvm::PointerType::getUnqual(Ops[1]->getType())); 6709 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6710 } 6711 case NEON::BI__builtin_neon_vld2_dup_v: 6712 case NEON::BI__builtin_neon_vld2q_dup_v: { 6713 llvm::Type *PTy = 6714 llvm::PointerType::getUnqual(VTy->getElementType()); 6715 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6716 llvm::Type *Tys[2] = { VTy, PTy }; 6717 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); 6718 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 6719 Ops[0] = Builder.CreateBitCast(Ops[0], 6720 llvm::PointerType::getUnqual(Ops[1]->getType())); 6721 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6722 } 6723 case NEON::BI__builtin_neon_vld3_dup_v: 6724 case NEON::BI__builtin_neon_vld3q_dup_v: { 6725 llvm::Type *PTy = 6726 llvm::PointerType::getUnqual(VTy->getElementType()); 6727 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6728 llvm::Type *Tys[2] = { VTy, PTy }; 6729 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); 6730 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 6731 Ops[0] = Builder.CreateBitCast(Ops[0], 6732 llvm::PointerType::getUnqual(Ops[1]->getType())); 6733 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6734 } 6735 case NEON::BI__builtin_neon_vld4_dup_v: 6736 case NEON::BI__builtin_neon_vld4q_dup_v: { 6737 llvm::Type *PTy = 6738 llvm::PointerType::getUnqual(VTy->getElementType()); 6739 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6740 llvm::Type *Tys[2] = { VTy, PTy }; 6741 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); 6742 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 6743 Ops[0] = Builder.CreateBitCast(Ops[0], 6744 llvm::PointerType::getUnqual(Ops[1]->getType())); 6745 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6746 } 6747 case NEON::BI__builtin_neon_vld2_lane_v: 6748 case NEON::BI__builtin_neon_vld2q_lane_v: { 6749 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 6750 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); 6751 Ops.push_back(Ops[1]); 6752 Ops.erase(Ops.begin()+1); 6753 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6754 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6755 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 6756 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); 6757 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6758 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6759 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6760 } 6761 case NEON::BI__builtin_neon_vld3_lane_v: 6762 case NEON::BI__builtin_neon_vld3q_lane_v: { 6763 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 6764 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); 6765 Ops.push_back(Ops[1]); 6766 Ops.erase(Ops.begin()+1); 6767 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6768 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6769 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 6770 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 6771 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); 6772 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6773 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6774 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6775 } 6776 case NEON::BI__builtin_neon_vld4_lane_v: 6777 case NEON::BI__builtin_neon_vld4q_lane_v: { 6778 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 6779 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); 6780 Ops.push_back(Ops[1]); 6781 Ops.erase(Ops.begin()+1); 6782 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6783 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6784 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 6785 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 6786 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty); 6787 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane"); 6788 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6789 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6790 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6791 } 6792 case NEON::BI__builtin_neon_vst2_v: 6793 case NEON::BI__builtin_neon_vst2q_v: { 6794 Ops.push_back(Ops[0]); 6795 Ops.erase(Ops.begin()); 6796 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; 6797 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), 6798 Ops, ""); 6799 } 6800 case NEON::BI__builtin_neon_vst2_lane_v: 6801 case NEON::BI__builtin_neon_vst2q_lane_v: { 6802 Ops.push_back(Ops[0]); 6803 Ops.erase(Ops.begin()); 6804 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); 6805 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 6806 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), 6807 Ops, ""); 6808 } 6809 case NEON::BI__builtin_neon_vst3_v: 6810 case NEON::BI__builtin_neon_vst3q_v: { 6811 Ops.push_back(Ops[0]); 6812 Ops.erase(Ops.begin()); 6813 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 6814 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), 6815 Ops, ""); 6816 } 6817 case NEON::BI__builtin_neon_vst3_lane_v: 6818 case NEON::BI__builtin_neon_vst3q_lane_v: { 6819 Ops.push_back(Ops[0]); 6820 Ops.erase(Ops.begin()); 6821 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 6822 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 6823 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), 6824 Ops, ""); 6825 } 6826 case NEON::BI__builtin_neon_vst4_v: 6827 case NEON::BI__builtin_neon_vst4q_v: { 6828 Ops.push_back(Ops[0]); 6829 Ops.erase(Ops.begin()); 6830 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 6831 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), 6832 Ops, ""); 6833 } 6834 case NEON::BI__builtin_neon_vst4_lane_v: 6835 case NEON::BI__builtin_neon_vst4q_lane_v: { 6836 Ops.push_back(Ops[0]); 6837 Ops.erase(Ops.begin()); 6838 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 6839 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; 6840 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), 6841 Ops, ""); 6842 } 6843 case NEON::BI__builtin_neon_vtrn_v: 6844 case NEON::BI__builtin_neon_vtrnq_v: { 6845 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 6846 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6847 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6848 Value *SV = nullptr; 6849 6850 for (unsigned vi = 0; vi != 2; ++vi) { 6851 SmallVector<uint32_t, 16> Indices; 6852 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 6853 Indices.push_back(i+vi); 6854 Indices.push_back(i+e+vi); 6855 } 6856 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 6857 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 6858 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 6859 } 6860 return SV; 6861 } 6862 case NEON::BI__builtin_neon_vuzp_v: 6863 case NEON::BI__builtin_neon_vuzpq_v: { 6864 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 6865 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6866 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6867 Value *SV = nullptr; 6868 6869 for (unsigned vi = 0; vi != 2; ++vi) { 6870 SmallVector<uint32_t, 16> Indices; 6871 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 6872 Indices.push_back(2*i+vi); 6873 6874 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 6875 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 6876 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 6877 } 6878 return SV; 6879 } 6880 case NEON::BI__builtin_neon_vzip_v: 6881 case NEON::BI__builtin_neon_vzipq_v: { 6882 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 6883 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6884 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6885 Value *SV = nullptr; 6886 6887 for (unsigned vi = 0; vi != 2; ++vi) { 6888 SmallVector<uint32_t, 16> Indices; 6889 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 6890 Indices.push_back((i + vi*e) >> 1); 6891 Indices.push_back(((i + vi*e) >> 1)+e); 6892 } 6893 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 6894 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 6895 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 6896 } 6897 return SV; 6898 } 6899 case NEON::BI__builtin_neon_vqtbl1q_v: { 6900 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty), 6901 Ops, "vtbl1"); 6902 } 6903 case NEON::BI__builtin_neon_vqtbl2q_v: { 6904 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty), 6905 Ops, "vtbl2"); 6906 } 6907 case NEON::BI__builtin_neon_vqtbl3q_v: { 6908 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty), 6909 Ops, "vtbl3"); 6910 } 6911 case NEON::BI__builtin_neon_vqtbl4q_v: { 6912 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty), 6913 Ops, "vtbl4"); 6914 } 6915 case NEON::BI__builtin_neon_vqtbx1q_v: { 6916 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty), 6917 Ops, "vtbx1"); 6918 } 6919 case NEON::BI__builtin_neon_vqtbx2q_v: { 6920 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty), 6921 Ops, "vtbx2"); 6922 } 6923 case NEON::BI__builtin_neon_vqtbx3q_v: { 6924 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty), 6925 Ops, "vtbx3"); 6926 } 6927 case NEON::BI__builtin_neon_vqtbx4q_v: { 6928 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty), 6929 Ops, "vtbx4"); 6930 } 6931 case NEON::BI__builtin_neon_vsqadd_v: 6932 case NEON::BI__builtin_neon_vsqaddq_v: { 6933 Int = Intrinsic::aarch64_neon_usqadd; 6934 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); 6935 } 6936 case NEON::BI__builtin_neon_vuqadd_v: 6937 case NEON::BI__builtin_neon_vuqaddq_v: { 6938 Int = Intrinsic::aarch64_neon_suqadd; 6939 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); 6940 } 6941 } 6942 } 6943 6944 llvm::Value *CodeGenFunction:: 6945 BuildVector(ArrayRef<llvm::Value*> Ops) { 6946 assert((Ops.size() & (Ops.size() - 1)) == 0 && 6947 "Not a power-of-two sized vector!"); 6948 bool AllConstants = true; 6949 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 6950 AllConstants &= isa<Constant>(Ops[i]); 6951 6952 // If this is a constant vector, create a ConstantVector. 6953 if (AllConstants) { 6954 SmallVector<llvm::Constant*, 16> CstOps; 6955 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 6956 CstOps.push_back(cast<Constant>(Ops[i])); 6957 return llvm::ConstantVector::get(CstOps); 6958 } 6959 6960 // Otherwise, insertelement the values to build the vector. 6961 Value *Result = 6962 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 6963 6964 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 6965 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); 6966 6967 return Result; 6968 } 6969 6970 // Convert the mask from an integer type to a vector of i1. 6971 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, 6972 unsigned NumElts) { 6973 6974 llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(), 6975 cast<IntegerType>(Mask->getType())->getBitWidth()); 6976 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy); 6977 6978 // If we have less than 8 elements, then the starting mask was an i8 and 6979 // we need to extract down to the right number of elements. 6980 if (NumElts < 8) { 6981 uint32_t Indices[4]; 6982 for (unsigned i = 0; i != NumElts; ++i) 6983 Indices[i] = i; 6984 MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec, 6985 makeArrayRef(Indices, NumElts), 6986 "extract"); 6987 } 6988 return MaskVec; 6989 } 6990 6991 static Value *EmitX86MaskedStore(CodeGenFunction &CGF, 6992 SmallVectorImpl<Value *> &Ops, 6993 unsigned Align) { 6994 // Cast the pointer to right type. 6995 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 6996 llvm::PointerType::getUnqual(Ops[1]->getType())); 6997 6998 // If the mask is all ones just emit a regular store. 6999 if (const auto *C = dyn_cast<Constant>(Ops[2])) 7000 if (C->isAllOnesValue()) 7001 return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align); 7002 7003 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 7004 Ops[1]->getType()->getVectorNumElements()); 7005 7006 return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec); 7007 } 7008 7009 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, 7010 SmallVectorImpl<Value *> &Ops, unsigned Align) { 7011 // Cast the pointer to right type. 7012 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 7013 llvm::PointerType::getUnqual(Ops[1]->getType())); 7014 7015 // If the mask is all ones just emit a regular store. 7016 if (const auto *C = dyn_cast<Constant>(Ops[2])) 7017 if (C->isAllOnesValue()) 7018 return CGF.Builder.CreateAlignedLoad(Ops[0], Align); 7019 7020 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 7021 Ops[1]->getType()->getVectorNumElements()); 7022 7023 return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); 7024 } 7025 7026 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF, 7027 SmallVectorImpl<Value *> &Ops, 7028 llvm::Type *DstTy, 7029 unsigned SrcSizeInBits, 7030 unsigned Align) { 7031 // Load the subvector. 7032 Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align); 7033 7034 // Create broadcast mask. 7035 unsigned NumDstElts = DstTy->getVectorNumElements(); 7036 unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits(); 7037 7038 SmallVector<uint32_t, 8> Mask; 7039 for (unsigned i = 0; i != NumDstElts; i += NumSrcElts) 7040 for (unsigned j = 0; j != NumSrcElts; ++j) 7041 Mask.push_back(j); 7042 7043 return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst"); 7044 } 7045 7046 static Value *EmitX86Select(CodeGenFunction &CGF, 7047 Value *Mask, Value *Op0, Value *Op1) { 7048 7049 // If the mask is all ones just return first argument. 7050 if (const auto *C = dyn_cast<Constant>(Mask)) 7051 if (C->isAllOnesValue()) 7052 return Op0; 7053 7054 Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements()); 7055 7056 return CGF.Builder.CreateSelect(Mask, Op0, Op1); 7057 } 7058 7059 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, 7060 bool Signed, SmallVectorImpl<Value *> &Ops) { 7061 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 7062 Value *Cmp; 7063 7064 if (CC == 3) { 7065 Cmp = Constant::getNullValue( 7066 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 7067 } else if (CC == 7) { 7068 Cmp = Constant::getAllOnesValue( 7069 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 7070 } else { 7071 ICmpInst::Predicate Pred; 7072 switch (CC) { 7073 default: llvm_unreachable("Unknown condition code"); 7074 case 0: Pred = ICmpInst::ICMP_EQ; break; 7075 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; 7076 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; 7077 case 4: Pred = ICmpInst::ICMP_NE; break; 7078 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; 7079 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; 7080 } 7081 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 7082 } 7083 7084 const auto *C = dyn_cast<Constant>(Ops.back()); 7085 if (!C || !C->isAllOnesValue()) 7086 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts)); 7087 7088 if (NumElts < 8) { 7089 uint32_t Indices[8]; 7090 for (unsigned i = 0; i != NumElts; ++i) 7091 Indices[i] = i; 7092 for (unsigned i = NumElts; i != 8; ++i) 7093 Indices[i] = i % NumElts + NumElts; 7094 Cmp = CGF.Builder.CreateShuffleVector( 7095 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); 7096 } 7097 return CGF.Builder.CreateBitCast(Cmp, 7098 IntegerType::get(CGF.getLLVMContext(), 7099 std::max(NumElts, 8U))); 7100 } 7101 7102 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, 7103 ArrayRef<Value *> Ops) { 7104 Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 7105 Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 7106 7107 if (Ops.size() == 2) 7108 return Res; 7109 7110 assert(Ops.size() == 4); 7111 return EmitX86Select(CGF, Ops[3], Res, Ops[2]); 7112 } 7113 7114 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 7115 const CallExpr *E) { 7116 if (BuiltinID == X86::BI__builtin_ms_va_start || 7117 BuiltinID == X86::BI__builtin_ms_va_end) 7118 return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), 7119 BuiltinID == X86::BI__builtin_ms_va_start); 7120 if (BuiltinID == X86::BI__builtin_ms_va_copy) { 7121 // Lower this manually. We can't reliably determine whether or not any 7122 // given va_copy() is for a Win64 va_list from the calling convention 7123 // alone, because it's legal to do this from a System V ABI function. 7124 // With opaque pointer types, we won't have enough information in LLVM 7125 // IR to determine this from the argument types, either. Best to do it 7126 // now, while we have enough information. 7127 Address DestAddr = EmitMSVAListRef(E->getArg(0)); 7128 Address SrcAddr = EmitMSVAListRef(E->getArg(1)); 7129 7130 llvm::Type *BPP = Int8PtrPtrTy; 7131 7132 DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), 7133 DestAddr.getAlignment()); 7134 SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), 7135 SrcAddr.getAlignment()); 7136 7137 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); 7138 return Builder.CreateStore(ArgPtr, DestAddr); 7139 } 7140 7141 SmallVector<Value*, 4> Ops; 7142 7143 // Find out if any arguments are required to be integer constant expressions. 7144 unsigned ICEArguments = 0; 7145 ASTContext::GetBuiltinTypeError Error; 7146 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 7147 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 7148 7149 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 7150 // If this is a normal argument, just emit it as a scalar. 7151 if ((ICEArguments & (1 << i)) == 0) { 7152 Ops.push_back(EmitScalarExpr(E->getArg(i))); 7153 continue; 7154 } 7155 7156 // If this is required to be a constant, constant fold it so that we know 7157 // that the generated intrinsic gets a ConstantInt. 7158 llvm::APSInt Result; 7159 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 7160 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 7161 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 7162 } 7163 7164 // These exist so that the builtin that takes an immediate can be bounds 7165 // checked by clang to avoid passing bad immediates to the backend. Since 7166 // AVX has a larger immediate than SSE we would need separate builtins to 7167 // do the different bounds checking. Rather than create a clang specific 7168 // SSE only builtin, this implements eight separate builtins to match gcc 7169 // implementation. 7170 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) { 7171 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); 7172 llvm::Function *F = CGM.getIntrinsic(ID); 7173 return Builder.CreateCall(F, Ops); 7174 }; 7175 7176 // For the vector forms of FP comparisons, translate the builtins directly to 7177 // IR. 7178 // TODO: The builtins could be removed if the SSE header files used vector 7179 // extension comparisons directly (vector ordered/unordered may need 7180 // additional support via __builtin_isnan()). 7181 auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) { 7182 Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); 7183 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType()); 7184 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy); 7185 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); 7186 return Builder.CreateBitCast(Sext, FPVecTy); 7187 }; 7188 7189 switch (BuiltinID) { 7190 default: return nullptr; 7191 case X86::BI__builtin_cpu_supports: { 7192 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts(); 7193 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString(); 7194 7195 // TODO: When/if this becomes more than x86 specific then use a TargetInfo 7196 // based mapping. 7197 // Processor features and mapping to processor feature value. 7198 enum X86Features { 7199 CMOV = 0, 7200 MMX, 7201 POPCNT, 7202 SSE, 7203 SSE2, 7204 SSE3, 7205 SSSE3, 7206 SSE4_1, 7207 SSE4_2, 7208 AVX, 7209 AVX2, 7210 SSE4_A, 7211 FMA4, 7212 XOP, 7213 FMA, 7214 AVX512F, 7215 BMI, 7216 BMI2, 7217 AES, 7218 PCLMUL, 7219 AVX512VL, 7220 AVX512BW, 7221 AVX512DQ, 7222 AVX512CD, 7223 AVX512ER, 7224 AVX512PF, 7225 AVX512VBMI, 7226 AVX512IFMA, 7227 MAX 7228 }; 7229 7230 X86Features Feature = StringSwitch<X86Features>(FeatureStr) 7231 .Case("cmov", X86Features::CMOV) 7232 .Case("mmx", X86Features::MMX) 7233 .Case("popcnt", X86Features::POPCNT) 7234 .Case("sse", X86Features::SSE) 7235 .Case("sse2", X86Features::SSE2) 7236 .Case("sse3", X86Features::SSE3) 7237 .Case("ssse3", X86Features::SSSE3) 7238 .Case("sse4.1", X86Features::SSE4_1) 7239 .Case("sse4.2", X86Features::SSE4_2) 7240 .Case("avx", X86Features::AVX) 7241 .Case("avx2", X86Features::AVX2) 7242 .Case("sse4a", X86Features::SSE4_A) 7243 .Case("fma4", X86Features::FMA4) 7244 .Case("xop", X86Features::XOP) 7245 .Case("fma", X86Features::FMA) 7246 .Case("avx512f", X86Features::AVX512F) 7247 .Case("bmi", X86Features::BMI) 7248 .Case("bmi2", X86Features::BMI2) 7249 .Case("aes", X86Features::AES) 7250 .Case("pclmul", X86Features::PCLMUL) 7251 .Case("avx512vl", X86Features::AVX512VL) 7252 .Case("avx512bw", X86Features::AVX512BW) 7253 .Case("avx512dq", X86Features::AVX512DQ) 7254 .Case("avx512cd", X86Features::AVX512CD) 7255 .Case("avx512er", X86Features::AVX512ER) 7256 .Case("avx512pf", X86Features::AVX512PF) 7257 .Case("avx512vbmi", X86Features::AVX512VBMI) 7258 .Case("avx512ifma", X86Features::AVX512IFMA) 7259 .Default(X86Features::MAX); 7260 assert(Feature != X86Features::MAX && "Invalid feature!"); 7261 7262 // Matching the struct layout from the compiler-rt/libgcc structure that is 7263 // filled in: 7264 // unsigned int __cpu_vendor; 7265 // unsigned int __cpu_type; 7266 // unsigned int __cpu_subtype; 7267 // unsigned int __cpu_features[1]; 7268 llvm::Type *STy = llvm::StructType::get( 7269 Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr); 7270 7271 // Grab the global __cpu_model. 7272 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); 7273 7274 // Grab the first (0th) element from the field __cpu_features off of the 7275 // global in the struct STy. 7276 Value *Idxs[] = { 7277 ConstantInt::get(Int32Ty, 0), 7278 ConstantInt::get(Int32Ty, 3), 7279 ConstantInt::get(Int32Ty, 0) 7280 }; 7281 Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); 7282 Value *Features = Builder.CreateAlignedLoad(CpuFeatures, 7283 CharUnits::fromQuantity(4)); 7284 7285 // Check the value of the bit corresponding to the feature requested. 7286 Value *Bitset = Builder.CreateAnd( 7287 Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature)); 7288 return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); 7289 } 7290 case X86::BI_mm_prefetch: { 7291 Value *Address = Ops[0]; 7292 Value *RW = ConstantInt::get(Int32Ty, 0); 7293 Value *Locality = Ops[1]; 7294 Value *Data = ConstantInt::get(Int32Ty, 1); 7295 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 7296 return Builder.CreateCall(F, {Address, RW, Locality, Data}); 7297 } 7298 case X86::BI_mm_clflush: { 7299 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush), 7300 Ops[0]); 7301 } 7302 case X86::BI_mm_lfence: { 7303 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence)); 7304 } 7305 case X86::BI_mm_mfence: { 7306 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence)); 7307 } 7308 case X86::BI_mm_sfence: { 7309 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence)); 7310 } 7311 case X86::BI_mm_pause: { 7312 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause)); 7313 } 7314 case X86::BI__rdtsc: { 7315 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc)); 7316 } 7317 case X86::BI__builtin_ia32_undef128: 7318 case X86::BI__builtin_ia32_undef256: 7319 case X86::BI__builtin_ia32_undef512: 7320 return UndefValue::get(ConvertType(E->getType())); 7321 case X86::BI__builtin_ia32_vec_init_v8qi: 7322 case X86::BI__builtin_ia32_vec_init_v4hi: 7323 case X86::BI__builtin_ia32_vec_init_v2si: 7324 return Builder.CreateBitCast(BuildVector(Ops), 7325 llvm::Type::getX86_MMXTy(getLLVMContext())); 7326 case X86::BI__builtin_ia32_vec_ext_v2si: 7327 return Builder.CreateExtractElement(Ops[0], 7328 llvm::ConstantInt::get(Ops[1]->getType(), 0)); 7329 case X86::BI_mm_setcsr: 7330 case X86::BI__builtin_ia32_ldmxcsr: { 7331 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 7332 Builder.CreateStore(Ops[0], Tmp); 7333 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 7334 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 7335 } 7336 case X86::BI_mm_getcsr: 7337 case X86::BI__builtin_ia32_stmxcsr: { 7338 Address Tmp = CreateMemTemp(E->getType()); 7339 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 7340 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 7341 return Builder.CreateLoad(Tmp, "stmxcsr"); 7342 } 7343 case X86::BI__builtin_ia32_xsave: 7344 case X86::BI__builtin_ia32_xsave64: 7345 case X86::BI__builtin_ia32_xrstor: 7346 case X86::BI__builtin_ia32_xrstor64: 7347 case X86::BI__builtin_ia32_xsaveopt: 7348 case X86::BI__builtin_ia32_xsaveopt64: 7349 case X86::BI__builtin_ia32_xrstors: 7350 case X86::BI__builtin_ia32_xrstors64: 7351 case X86::BI__builtin_ia32_xsavec: 7352 case X86::BI__builtin_ia32_xsavec64: 7353 case X86::BI__builtin_ia32_xsaves: 7354 case X86::BI__builtin_ia32_xsaves64: { 7355 Intrinsic::ID ID; 7356 #define INTRINSIC_X86_XSAVE_ID(NAME) \ 7357 case X86::BI__builtin_ia32_##NAME: \ 7358 ID = Intrinsic::x86_##NAME; \ 7359 break 7360 switch (BuiltinID) { 7361 default: llvm_unreachable("Unsupported intrinsic!"); 7362 INTRINSIC_X86_XSAVE_ID(xsave); 7363 INTRINSIC_X86_XSAVE_ID(xsave64); 7364 INTRINSIC_X86_XSAVE_ID(xrstor); 7365 INTRINSIC_X86_XSAVE_ID(xrstor64); 7366 INTRINSIC_X86_XSAVE_ID(xsaveopt); 7367 INTRINSIC_X86_XSAVE_ID(xsaveopt64); 7368 INTRINSIC_X86_XSAVE_ID(xrstors); 7369 INTRINSIC_X86_XSAVE_ID(xrstors64); 7370 INTRINSIC_X86_XSAVE_ID(xsavec); 7371 INTRINSIC_X86_XSAVE_ID(xsavec64); 7372 INTRINSIC_X86_XSAVE_ID(xsaves); 7373 INTRINSIC_X86_XSAVE_ID(xsaves64); 7374 } 7375 #undef INTRINSIC_X86_XSAVE_ID 7376 Value *Mhi = Builder.CreateTrunc( 7377 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty); 7378 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty); 7379 Ops[1] = Mhi; 7380 Ops.push_back(Mlo); 7381 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 7382 } 7383 case X86::BI__builtin_ia32_storedqudi128_mask: 7384 case X86::BI__builtin_ia32_storedqusi128_mask: 7385 case X86::BI__builtin_ia32_storedquhi128_mask: 7386 case X86::BI__builtin_ia32_storedquqi128_mask: 7387 case X86::BI__builtin_ia32_storeupd128_mask: 7388 case X86::BI__builtin_ia32_storeups128_mask: 7389 case X86::BI__builtin_ia32_storedqudi256_mask: 7390 case X86::BI__builtin_ia32_storedqusi256_mask: 7391 case X86::BI__builtin_ia32_storedquhi256_mask: 7392 case X86::BI__builtin_ia32_storedquqi256_mask: 7393 case X86::BI__builtin_ia32_storeupd256_mask: 7394 case X86::BI__builtin_ia32_storeups256_mask: 7395 case X86::BI__builtin_ia32_storedqudi512_mask: 7396 case X86::BI__builtin_ia32_storedqusi512_mask: 7397 case X86::BI__builtin_ia32_storedquhi512_mask: 7398 case X86::BI__builtin_ia32_storedquqi512_mask: 7399 case X86::BI__builtin_ia32_storeupd512_mask: 7400 case X86::BI__builtin_ia32_storeups512_mask: 7401 return EmitX86MaskedStore(*this, Ops, 1); 7402 7403 case X86::BI__builtin_ia32_storess128_mask: 7404 case X86::BI__builtin_ia32_storesd128_mask: { 7405 return EmitX86MaskedStore(*this, Ops, 16); 7406 } 7407 case X86::BI__builtin_ia32_movdqa32store128_mask: 7408 case X86::BI__builtin_ia32_movdqa64store128_mask: 7409 case X86::BI__builtin_ia32_storeaps128_mask: 7410 case X86::BI__builtin_ia32_storeapd128_mask: 7411 case X86::BI__builtin_ia32_movdqa32store256_mask: 7412 case X86::BI__builtin_ia32_movdqa64store256_mask: 7413 case X86::BI__builtin_ia32_storeaps256_mask: 7414 case X86::BI__builtin_ia32_storeapd256_mask: 7415 case X86::BI__builtin_ia32_movdqa32store512_mask: 7416 case X86::BI__builtin_ia32_movdqa64store512_mask: 7417 case X86::BI__builtin_ia32_storeaps512_mask: 7418 case X86::BI__builtin_ia32_storeapd512_mask: { 7419 unsigned Align = 7420 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 7421 return EmitX86MaskedStore(*this, Ops, Align); 7422 } 7423 case X86::BI__builtin_ia32_loadups128_mask: 7424 case X86::BI__builtin_ia32_loadups256_mask: 7425 case X86::BI__builtin_ia32_loadups512_mask: 7426 case X86::BI__builtin_ia32_loadupd128_mask: 7427 case X86::BI__builtin_ia32_loadupd256_mask: 7428 case X86::BI__builtin_ia32_loadupd512_mask: 7429 case X86::BI__builtin_ia32_loaddquqi128_mask: 7430 case X86::BI__builtin_ia32_loaddquqi256_mask: 7431 case X86::BI__builtin_ia32_loaddquqi512_mask: 7432 case X86::BI__builtin_ia32_loaddquhi128_mask: 7433 case X86::BI__builtin_ia32_loaddquhi256_mask: 7434 case X86::BI__builtin_ia32_loaddquhi512_mask: 7435 case X86::BI__builtin_ia32_loaddqusi128_mask: 7436 case X86::BI__builtin_ia32_loaddqusi256_mask: 7437 case X86::BI__builtin_ia32_loaddqusi512_mask: 7438 case X86::BI__builtin_ia32_loaddqudi128_mask: 7439 case X86::BI__builtin_ia32_loaddqudi256_mask: 7440 case X86::BI__builtin_ia32_loaddqudi512_mask: 7441 return EmitX86MaskedLoad(*this, Ops, 1); 7442 7443 case X86::BI__builtin_ia32_loadss128_mask: 7444 case X86::BI__builtin_ia32_loadsd128_mask: 7445 return EmitX86MaskedLoad(*this, Ops, 16); 7446 7447 case X86::BI__builtin_ia32_loadaps128_mask: 7448 case X86::BI__builtin_ia32_loadaps256_mask: 7449 case X86::BI__builtin_ia32_loadaps512_mask: 7450 case X86::BI__builtin_ia32_loadapd128_mask: 7451 case X86::BI__builtin_ia32_loadapd256_mask: 7452 case X86::BI__builtin_ia32_loadapd512_mask: 7453 case X86::BI__builtin_ia32_movdqa32load128_mask: 7454 case X86::BI__builtin_ia32_movdqa32load256_mask: 7455 case X86::BI__builtin_ia32_movdqa32load512_mask: 7456 case X86::BI__builtin_ia32_movdqa64load128_mask: 7457 case X86::BI__builtin_ia32_movdqa64load256_mask: 7458 case X86::BI__builtin_ia32_movdqa64load512_mask: { 7459 unsigned Align = 7460 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 7461 return EmitX86MaskedLoad(*this, Ops, Align); 7462 } 7463 7464 case X86::BI__builtin_ia32_vbroadcastf128_pd256: 7465 case X86::BI__builtin_ia32_vbroadcastf128_ps256: { 7466 llvm::Type *DstTy = ConvertType(E->getType()); 7467 return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1); 7468 } 7469 7470 case X86::BI__builtin_ia32_storehps: 7471 case X86::BI__builtin_ia32_storelps: { 7472 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 7473 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 7474 7475 // cast val v2i64 7476 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 7477 7478 // extract (0, 1) 7479 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 7480 llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index); 7481 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 7482 7483 // cast pointer to i64 & store 7484 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 7485 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7486 } 7487 case X86::BI__builtin_ia32_palignr128: 7488 case X86::BI__builtin_ia32_palignr256: 7489 case X86::BI__builtin_ia32_palignr512_mask: { 7490 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 7491 7492 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 7493 assert(NumElts % 16 == 0); 7494 7495 // If palignr is shifting the pair of vectors more than the size of two 7496 // lanes, emit zero. 7497 if (ShiftVal >= 32) 7498 return llvm::Constant::getNullValue(ConvertType(E->getType())); 7499 7500 // If palignr is shifting the pair of input vectors more than one lane, 7501 // but less than two lanes, convert to shifting in zeroes. 7502 if (ShiftVal > 16) { 7503 ShiftVal -= 16; 7504 Ops[1] = Ops[0]; 7505 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); 7506 } 7507 7508 uint32_t Indices[64]; 7509 // 256-bit palignr operates on 128-bit lanes so we need to handle that 7510 for (unsigned l = 0; l != NumElts; l += 16) { 7511 for (unsigned i = 0; i != 16; ++i) { 7512 unsigned Idx = ShiftVal + i; 7513 if (Idx >= 16) 7514 Idx += NumElts - 16; // End of lane, switch operand. 7515 Indices[l + i] = Idx + l; 7516 } 7517 } 7518 7519 Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0], 7520 makeArrayRef(Indices, NumElts), 7521 "palignr"); 7522 7523 // If this isn't a masked builtin, just return the align operation. 7524 if (Ops.size() == 3) 7525 return Align; 7526 7527 return EmitX86Select(*this, Ops[4], Align, Ops[3]); 7528 } 7529 7530 case X86::BI__builtin_ia32_movnti: 7531 case X86::BI__builtin_ia32_movnti64: 7532 case X86::BI__builtin_ia32_movntsd: 7533 case X86::BI__builtin_ia32_movntss: { 7534 llvm::MDNode *Node = llvm::MDNode::get( 7535 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); 7536 7537 Value *Ptr = Ops[0]; 7538 Value *Src = Ops[1]; 7539 7540 // Extract the 0'th element of the source vector. 7541 if (BuiltinID == X86::BI__builtin_ia32_movntsd || 7542 BuiltinID == X86::BI__builtin_ia32_movntss) 7543 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract"); 7544 7545 // Convert the type of the pointer to a pointer to the stored type. 7546 Value *BC = Builder.CreateBitCast( 7547 Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast"); 7548 7549 // Unaligned nontemporal store of the scalar value. 7550 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC); 7551 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 7552 SI->setAlignment(1); 7553 return SI; 7554 } 7555 7556 case X86::BI__builtin_ia32_selectb_128: 7557 case X86::BI__builtin_ia32_selectb_256: 7558 case X86::BI__builtin_ia32_selectb_512: 7559 case X86::BI__builtin_ia32_selectw_128: 7560 case X86::BI__builtin_ia32_selectw_256: 7561 case X86::BI__builtin_ia32_selectw_512: 7562 case X86::BI__builtin_ia32_selectd_128: 7563 case X86::BI__builtin_ia32_selectd_256: 7564 case X86::BI__builtin_ia32_selectd_512: 7565 case X86::BI__builtin_ia32_selectq_128: 7566 case X86::BI__builtin_ia32_selectq_256: 7567 case X86::BI__builtin_ia32_selectq_512: 7568 case X86::BI__builtin_ia32_selectps_128: 7569 case X86::BI__builtin_ia32_selectps_256: 7570 case X86::BI__builtin_ia32_selectps_512: 7571 case X86::BI__builtin_ia32_selectpd_128: 7572 case X86::BI__builtin_ia32_selectpd_256: 7573 case X86::BI__builtin_ia32_selectpd_512: 7574 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); 7575 case X86::BI__builtin_ia32_pcmpeqb128_mask: 7576 case X86::BI__builtin_ia32_pcmpeqb256_mask: 7577 case X86::BI__builtin_ia32_pcmpeqb512_mask: 7578 case X86::BI__builtin_ia32_pcmpeqw128_mask: 7579 case X86::BI__builtin_ia32_pcmpeqw256_mask: 7580 case X86::BI__builtin_ia32_pcmpeqw512_mask: 7581 case X86::BI__builtin_ia32_pcmpeqd128_mask: 7582 case X86::BI__builtin_ia32_pcmpeqd256_mask: 7583 case X86::BI__builtin_ia32_pcmpeqd512_mask: 7584 case X86::BI__builtin_ia32_pcmpeqq128_mask: 7585 case X86::BI__builtin_ia32_pcmpeqq256_mask: 7586 case X86::BI__builtin_ia32_pcmpeqq512_mask: 7587 return EmitX86MaskedCompare(*this, 0, false, Ops); 7588 case X86::BI__builtin_ia32_pcmpgtb128_mask: 7589 case X86::BI__builtin_ia32_pcmpgtb256_mask: 7590 case X86::BI__builtin_ia32_pcmpgtb512_mask: 7591 case X86::BI__builtin_ia32_pcmpgtw128_mask: 7592 case X86::BI__builtin_ia32_pcmpgtw256_mask: 7593 case X86::BI__builtin_ia32_pcmpgtw512_mask: 7594 case X86::BI__builtin_ia32_pcmpgtd128_mask: 7595 case X86::BI__builtin_ia32_pcmpgtd256_mask: 7596 case X86::BI__builtin_ia32_pcmpgtd512_mask: 7597 case X86::BI__builtin_ia32_pcmpgtq128_mask: 7598 case X86::BI__builtin_ia32_pcmpgtq256_mask: 7599 case X86::BI__builtin_ia32_pcmpgtq512_mask: 7600 return EmitX86MaskedCompare(*this, 6, true, Ops); 7601 case X86::BI__builtin_ia32_cmpb128_mask: 7602 case X86::BI__builtin_ia32_cmpb256_mask: 7603 case X86::BI__builtin_ia32_cmpb512_mask: 7604 case X86::BI__builtin_ia32_cmpw128_mask: 7605 case X86::BI__builtin_ia32_cmpw256_mask: 7606 case X86::BI__builtin_ia32_cmpw512_mask: 7607 case X86::BI__builtin_ia32_cmpd128_mask: 7608 case X86::BI__builtin_ia32_cmpd256_mask: 7609 case X86::BI__builtin_ia32_cmpd512_mask: 7610 case X86::BI__builtin_ia32_cmpq128_mask: 7611 case X86::BI__builtin_ia32_cmpq256_mask: 7612 case X86::BI__builtin_ia32_cmpq512_mask: { 7613 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 7614 return EmitX86MaskedCompare(*this, CC, true, Ops); 7615 } 7616 case X86::BI__builtin_ia32_ucmpb128_mask: 7617 case X86::BI__builtin_ia32_ucmpb256_mask: 7618 case X86::BI__builtin_ia32_ucmpb512_mask: 7619 case X86::BI__builtin_ia32_ucmpw128_mask: 7620 case X86::BI__builtin_ia32_ucmpw256_mask: 7621 case X86::BI__builtin_ia32_ucmpw512_mask: 7622 case X86::BI__builtin_ia32_ucmpd128_mask: 7623 case X86::BI__builtin_ia32_ucmpd256_mask: 7624 case X86::BI__builtin_ia32_ucmpd512_mask: 7625 case X86::BI__builtin_ia32_ucmpq128_mask: 7626 case X86::BI__builtin_ia32_ucmpq256_mask: 7627 case X86::BI__builtin_ia32_ucmpq512_mask: { 7628 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 7629 return EmitX86MaskedCompare(*this, CC, false, Ops); 7630 } 7631 7632 case X86::BI__builtin_ia32_vplzcntd_128_mask: 7633 case X86::BI__builtin_ia32_vplzcntd_256_mask: 7634 case X86::BI__builtin_ia32_vplzcntd_512_mask: 7635 case X86::BI__builtin_ia32_vplzcntq_128_mask: 7636 case X86::BI__builtin_ia32_vplzcntq_256_mask: 7637 case X86::BI__builtin_ia32_vplzcntq_512_mask: { 7638 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); 7639 return EmitX86Select(*this, Ops[2], 7640 Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}), 7641 Ops[1]); 7642 } 7643 7644 case X86::BI__builtin_ia32_pmaxsb128: 7645 case X86::BI__builtin_ia32_pmaxsw128: 7646 case X86::BI__builtin_ia32_pmaxsd128: 7647 case X86::BI__builtin_ia32_pmaxsq128_mask: 7648 case X86::BI__builtin_ia32_pmaxsb256: 7649 case X86::BI__builtin_ia32_pmaxsw256: 7650 case X86::BI__builtin_ia32_pmaxsd256: 7651 case X86::BI__builtin_ia32_pmaxsq256_mask: 7652 case X86::BI__builtin_ia32_pmaxsb512_mask: 7653 case X86::BI__builtin_ia32_pmaxsw512_mask: 7654 case X86::BI__builtin_ia32_pmaxsd512_mask: 7655 case X86::BI__builtin_ia32_pmaxsq512_mask: 7656 return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops); 7657 case X86::BI__builtin_ia32_pmaxub128: 7658 case X86::BI__builtin_ia32_pmaxuw128: 7659 case X86::BI__builtin_ia32_pmaxud128: 7660 case X86::BI__builtin_ia32_pmaxuq128_mask: 7661 case X86::BI__builtin_ia32_pmaxub256: 7662 case X86::BI__builtin_ia32_pmaxuw256: 7663 case X86::BI__builtin_ia32_pmaxud256: 7664 case X86::BI__builtin_ia32_pmaxuq256_mask: 7665 case X86::BI__builtin_ia32_pmaxub512_mask: 7666 case X86::BI__builtin_ia32_pmaxuw512_mask: 7667 case X86::BI__builtin_ia32_pmaxud512_mask: 7668 case X86::BI__builtin_ia32_pmaxuq512_mask: 7669 return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops); 7670 case X86::BI__builtin_ia32_pminsb128: 7671 case X86::BI__builtin_ia32_pminsw128: 7672 case X86::BI__builtin_ia32_pminsd128: 7673 case X86::BI__builtin_ia32_pminsq128_mask: 7674 case X86::BI__builtin_ia32_pminsb256: 7675 case X86::BI__builtin_ia32_pminsw256: 7676 case X86::BI__builtin_ia32_pminsd256: 7677 case X86::BI__builtin_ia32_pminsq256_mask: 7678 case X86::BI__builtin_ia32_pminsb512_mask: 7679 case X86::BI__builtin_ia32_pminsw512_mask: 7680 case X86::BI__builtin_ia32_pminsd512_mask: 7681 case X86::BI__builtin_ia32_pminsq512_mask: 7682 return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops); 7683 case X86::BI__builtin_ia32_pminub128: 7684 case X86::BI__builtin_ia32_pminuw128: 7685 case X86::BI__builtin_ia32_pminud128: 7686 case X86::BI__builtin_ia32_pminuq128_mask: 7687 case X86::BI__builtin_ia32_pminub256: 7688 case X86::BI__builtin_ia32_pminuw256: 7689 case X86::BI__builtin_ia32_pminud256: 7690 case X86::BI__builtin_ia32_pminuq256_mask: 7691 case X86::BI__builtin_ia32_pminub512_mask: 7692 case X86::BI__builtin_ia32_pminuw512_mask: 7693 case X86::BI__builtin_ia32_pminud512_mask: 7694 case X86::BI__builtin_ia32_pminuq512_mask: 7695 return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops); 7696 7697 // 3DNow! 7698 case X86::BI__builtin_ia32_pswapdsf: 7699 case X86::BI__builtin_ia32_pswapdsi: { 7700 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); 7701 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); 7702 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd); 7703 return Builder.CreateCall(F, Ops, "pswapd"); 7704 } 7705 case X86::BI__builtin_ia32_rdrand16_step: 7706 case X86::BI__builtin_ia32_rdrand32_step: 7707 case X86::BI__builtin_ia32_rdrand64_step: 7708 case X86::BI__builtin_ia32_rdseed16_step: 7709 case X86::BI__builtin_ia32_rdseed32_step: 7710 case X86::BI__builtin_ia32_rdseed64_step: { 7711 Intrinsic::ID ID; 7712 switch (BuiltinID) { 7713 default: llvm_unreachable("Unsupported intrinsic!"); 7714 case X86::BI__builtin_ia32_rdrand16_step: 7715 ID = Intrinsic::x86_rdrand_16; 7716 break; 7717 case X86::BI__builtin_ia32_rdrand32_step: 7718 ID = Intrinsic::x86_rdrand_32; 7719 break; 7720 case X86::BI__builtin_ia32_rdrand64_step: 7721 ID = Intrinsic::x86_rdrand_64; 7722 break; 7723 case X86::BI__builtin_ia32_rdseed16_step: 7724 ID = Intrinsic::x86_rdseed_16; 7725 break; 7726 case X86::BI__builtin_ia32_rdseed32_step: 7727 ID = Intrinsic::x86_rdseed_32; 7728 break; 7729 case X86::BI__builtin_ia32_rdseed64_step: 7730 ID = Intrinsic::x86_rdseed_64; 7731 break; 7732 } 7733 7734 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); 7735 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0), 7736 Ops[0]); 7737 return Builder.CreateExtractValue(Call, 1); 7738 } 7739 7740 // SSE packed comparison intrinsics 7741 case X86::BI__builtin_ia32_cmpeqps: 7742 case X86::BI__builtin_ia32_cmpeqpd: 7743 return getVectorFCmpIR(CmpInst::FCMP_OEQ); 7744 case X86::BI__builtin_ia32_cmpltps: 7745 case X86::BI__builtin_ia32_cmpltpd: 7746 return getVectorFCmpIR(CmpInst::FCMP_OLT); 7747 case X86::BI__builtin_ia32_cmpleps: 7748 case X86::BI__builtin_ia32_cmplepd: 7749 return getVectorFCmpIR(CmpInst::FCMP_OLE); 7750 case X86::BI__builtin_ia32_cmpunordps: 7751 case X86::BI__builtin_ia32_cmpunordpd: 7752 return getVectorFCmpIR(CmpInst::FCMP_UNO); 7753 case X86::BI__builtin_ia32_cmpneqps: 7754 case X86::BI__builtin_ia32_cmpneqpd: 7755 return getVectorFCmpIR(CmpInst::FCMP_UNE); 7756 case X86::BI__builtin_ia32_cmpnltps: 7757 case X86::BI__builtin_ia32_cmpnltpd: 7758 return getVectorFCmpIR(CmpInst::FCMP_UGE); 7759 case X86::BI__builtin_ia32_cmpnleps: 7760 case X86::BI__builtin_ia32_cmpnlepd: 7761 return getVectorFCmpIR(CmpInst::FCMP_UGT); 7762 case X86::BI__builtin_ia32_cmpordps: 7763 case X86::BI__builtin_ia32_cmpordpd: 7764 return getVectorFCmpIR(CmpInst::FCMP_ORD); 7765 case X86::BI__builtin_ia32_cmpps: 7766 case X86::BI__builtin_ia32_cmpps256: 7767 case X86::BI__builtin_ia32_cmppd: 7768 case X86::BI__builtin_ia32_cmppd256: { 7769 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 7770 // If this one of the SSE immediates, we can use native IR. 7771 if (CC < 8) { 7772 FCmpInst::Predicate Pred; 7773 switch (CC) { 7774 case 0: Pred = FCmpInst::FCMP_OEQ; break; 7775 case 1: Pred = FCmpInst::FCMP_OLT; break; 7776 case 2: Pred = FCmpInst::FCMP_OLE; break; 7777 case 3: Pred = FCmpInst::FCMP_UNO; break; 7778 case 4: Pred = FCmpInst::FCMP_UNE; break; 7779 case 5: Pred = FCmpInst::FCMP_UGE; break; 7780 case 6: Pred = FCmpInst::FCMP_UGT; break; 7781 case 7: Pred = FCmpInst::FCMP_ORD; break; 7782 } 7783 return getVectorFCmpIR(Pred); 7784 } 7785 7786 // We can't handle 8-31 immediates with native IR, use the intrinsic. 7787 Intrinsic::ID ID; 7788 switch (BuiltinID) { 7789 default: llvm_unreachable("Unsupported intrinsic!"); 7790 case X86::BI__builtin_ia32_cmpps: 7791 ID = Intrinsic::x86_sse_cmp_ps; 7792 break; 7793 case X86::BI__builtin_ia32_cmpps256: 7794 ID = Intrinsic::x86_avx_cmp_ps_256; 7795 break; 7796 case X86::BI__builtin_ia32_cmppd: 7797 ID = Intrinsic::x86_sse2_cmp_pd; 7798 break; 7799 case X86::BI__builtin_ia32_cmppd256: 7800 ID = Intrinsic::x86_avx_cmp_pd_256; 7801 break; 7802 } 7803 7804 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 7805 } 7806 7807 // SSE scalar comparison intrinsics 7808 case X86::BI__builtin_ia32_cmpeqss: 7809 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0); 7810 case X86::BI__builtin_ia32_cmpltss: 7811 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1); 7812 case X86::BI__builtin_ia32_cmpless: 7813 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2); 7814 case X86::BI__builtin_ia32_cmpunordss: 7815 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3); 7816 case X86::BI__builtin_ia32_cmpneqss: 7817 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4); 7818 case X86::BI__builtin_ia32_cmpnltss: 7819 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5); 7820 case X86::BI__builtin_ia32_cmpnless: 7821 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6); 7822 case X86::BI__builtin_ia32_cmpordss: 7823 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7); 7824 case X86::BI__builtin_ia32_cmpeqsd: 7825 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0); 7826 case X86::BI__builtin_ia32_cmpltsd: 7827 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1); 7828 case X86::BI__builtin_ia32_cmplesd: 7829 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2); 7830 case X86::BI__builtin_ia32_cmpunordsd: 7831 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3); 7832 case X86::BI__builtin_ia32_cmpneqsd: 7833 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4); 7834 case X86::BI__builtin_ia32_cmpnltsd: 7835 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5); 7836 case X86::BI__builtin_ia32_cmpnlesd: 7837 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6); 7838 case X86::BI__builtin_ia32_cmpordsd: 7839 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); 7840 7841 case X86::BI__emul: 7842 case X86::BI__emulu: { 7843 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); 7844 bool isSigned = (BuiltinID == X86::BI__emul); 7845 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned); 7846 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned); 7847 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned); 7848 } 7849 case X86::BI__mulh: 7850 case X86::BI__umulh: 7851 case X86::BI_mul128: 7852 case X86::BI_umul128: { 7853 llvm::Type *ResType = ConvertType(E->getType()); 7854 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 7855 7856 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128); 7857 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned); 7858 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned); 7859 7860 Value *MulResult, *HigherBits; 7861 if (IsSigned) { 7862 MulResult = Builder.CreateNSWMul(LHS, RHS); 7863 HigherBits = Builder.CreateAShr(MulResult, 64); 7864 } else { 7865 MulResult = Builder.CreateNUWMul(LHS, RHS); 7866 HigherBits = Builder.CreateLShr(MulResult, 64); 7867 } 7868 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); 7869 7870 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh) 7871 return HigherBits; 7872 7873 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2)); 7874 Builder.CreateStore(HigherBits, HighBitsAddress); 7875 return Builder.CreateIntCast(MulResult, ResType, IsSigned); 7876 } 7877 7878 case X86::BI__faststorefence: { 7879 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 7880 llvm::CrossThread); 7881 } 7882 case X86::BI_ReadWriteBarrier: 7883 case X86::BI_ReadBarrier: 7884 case X86::BI_WriteBarrier: { 7885 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 7886 llvm::SingleThread); 7887 } 7888 case X86::BI_BitScanForward: 7889 case X86::BI_BitScanForward64: 7890 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 7891 case X86::BI_BitScanReverse: 7892 case X86::BI_BitScanReverse64: 7893 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 7894 7895 case X86::BI_InterlockedAnd64: 7896 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 7897 case X86::BI_InterlockedExchange64: 7898 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 7899 case X86::BI_InterlockedExchangeAdd64: 7900 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 7901 case X86::BI_InterlockedExchangeSub64: 7902 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 7903 case X86::BI_InterlockedOr64: 7904 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 7905 case X86::BI_InterlockedXor64: 7906 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 7907 case X86::BI_InterlockedDecrement64: 7908 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 7909 case X86::BI_InterlockedIncrement64: 7910 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 7911 7912 case X86::BI_AddressOfReturnAddress: { 7913 Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); 7914 return Builder.CreateCall(F); 7915 } 7916 case X86::BI__stosb: { 7917 // We treat __stosb as a volatile memset - it may not generate "rep stosb" 7918 // instruction, but it will create a memset that won't be optimized away. 7919 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true); 7920 } 7921 } 7922 } 7923 7924 7925 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 7926 const CallExpr *E) { 7927 SmallVector<Value*, 4> Ops; 7928 7929 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 7930 Ops.push_back(EmitScalarExpr(E->getArg(i))); 7931 7932 Intrinsic::ID ID = Intrinsic::not_intrinsic; 7933 7934 switch (BuiltinID) { 7935 default: return nullptr; 7936 7937 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we 7938 // call __builtin_readcyclecounter. 7939 case PPC::BI__builtin_ppc_get_timebase: 7940 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); 7941 7942 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr 7943 case PPC::BI__builtin_altivec_lvx: 7944 case PPC::BI__builtin_altivec_lvxl: 7945 case PPC::BI__builtin_altivec_lvebx: 7946 case PPC::BI__builtin_altivec_lvehx: 7947 case PPC::BI__builtin_altivec_lvewx: 7948 case PPC::BI__builtin_altivec_lvsl: 7949 case PPC::BI__builtin_altivec_lvsr: 7950 case PPC::BI__builtin_vsx_lxvd2x: 7951 case PPC::BI__builtin_vsx_lxvw4x: 7952 case PPC::BI__builtin_vsx_lxvd2x_be: 7953 case PPC::BI__builtin_vsx_lxvw4x_be: 7954 case PPC::BI__builtin_vsx_lxvl: 7955 case PPC::BI__builtin_vsx_lxvll: 7956 { 7957 if(BuiltinID == PPC::BI__builtin_vsx_lxvl || 7958 BuiltinID == PPC::BI__builtin_vsx_lxvll){ 7959 Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy); 7960 }else { 7961 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 7962 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); 7963 Ops.pop_back(); 7964 } 7965 7966 switch (BuiltinID) { 7967 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); 7968 case PPC::BI__builtin_altivec_lvx: 7969 ID = Intrinsic::ppc_altivec_lvx; 7970 break; 7971 case PPC::BI__builtin_altivec_lvxl: 7972 ID = Intrinsic::ppc_altivec_lvxl; 7973 break; 7974 case PPC::BI__builtin_altivec_lvebx: 7975 ID = Intrinsic::ppc_altivec_lvebx; 7976 break; 7977 case PPC::BI__builtin_altivec_lvehx: 7978 ID = Intrinsic::ppc_altivec_lvehx; 7979 break; 7980 case PPC::BI__builtin_altivec_lvewx: 7981 ID = Intrinsic::ppc_altivec_lvewx; 7982 break; 7983 case PPC::BI__builtin_altivec_lvsl: 7984 ID = Intrinsic::ppc_altivec_lvsl; 7985 break; 7986 case PPC::BI__builtin_altivec_lvsr: 7987 ID = Intrinsic::ppc_altivec_lvsr; 7988 break; 7989 case PPC::BI__builtin_vsx_lxvd2x: 7990 ID = Intrinsic::ppc_vsx_lxvd2x; 7991 break; 7992 case PPC::BI__builtin_vsx_lxvw4x: 7993 ID = Intrinsic::ppc_vsx_lxvw4x; 7994 break; 7995 case PPC::BI__builtin_vsx_lxvd2x_be: 7996 ID = Intrinsic::ppc_vsx_lxvd2x_be; 7997 break; 7998 case PPC::BI__builtin_vsx_lxvw4x_be: 7999 ID = Intrinsic::ppc_vsx_lxvw4x_be; 8000 break; 8001 case PPC::BI__builtin_vsx_lxvl: 8002 ID = Intrinsic::ppc_vsx_lxvl; 8003 break; 8004 case PPC::BI__builtin_vsx_lxvll: 8005 ID = Intrinsic::ppc_vsx_lxvll; 8006 break; 8007 } 8008 llvm::Function *F = CGM.getIntrinsic(ID); 8009 return Builder.CreateCall(F, Ops, ""); 8010 } 8011 8012 // vec_st, vec_xst_be 8013 case PPC::BI__builtin_altivec_stvx: 8014 case PPC::BI__builtin_altivec_stvxl: 8015 case PPC::BI__builtin_altivec_stvebx: 8016 case PPC::BI__builtin_altivec_stvehx: 8017 case PPC::BI__builtin_altivec_stvewx: 8018 case PPC::BI__builtin_vsx_stxvd2x: 8019 case PPC::BI__builtin_vsx_stxvw4x: 8020 case PPC::BI__builtin_vsx_stxvd2x_be: 8021 case PPC::BI__builtin_vsx_stxvw4x_be: 8022 case PPC::BI__builtin_vsx_stxvl: 8023 case PPC::BI__builtin_vsx_stxvll: 8024 { 8025 if(BuiltinID == PPC::BI__builtin_vsx_stxvl || 8026 BuiltinID == PPC::BI__builtin_vsx_stxvll ){ 8027 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 8028 }else { 8029 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 8030 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); 8031 Ops.pop_back(); 8032 } 8033 8034 switch (BuiltinID) { 8035 default: llvm_unreachable("Unsupported st intrinsic!"); 8036 case PPC::BI__builtin_altivec_stvx: 8037 ID = Intrinsic::ppc_altivec_stvx; 8038 break; 8039 case PPC::BI__builtin_altivec_stvxl: 8040 ID = Intrinsic::ppc_altivec_stvxl; 8041 break; 8042 case PPC::BI__builtin_altivec_stvebx: 8043 ID = Intrinsic::ppc_altivec_stvebx; 8044 break; 8045 case PPC::BI__builtin_altivec_stvehx: 8046 ID = Intrinsic::ppc_altivec_stvehx; 8047 break; 8048 case PPC::BI__builtin_altivec_stvewx: 8049 ID = Intrinsic::ppc_altivec_stvewx; 8050 break; 8051 case PPC::BI__builtin_vsx_stxvd2x: 8052 ID = Intrinsic::ppc_vsx_stxvd2x; 8053 break; 8054 case PPC::BI__builtin_vsx_stxvw4x: 8055 ID = Intrinsic::ppc_vsx_stxvw4x; 8056 break; 8057 case PPC::BI__builtin_vsx_stxvd2x_be: 8058 ID = Intrinsic::ppc_vsx_stxvd2x_be; 8059 break; 8060 case PPC::BI__builtin_vsx_stxvw4x_be: 8061 ID = Intrinsic::ppc_vsx_stxvw4x_be; 8062 break; 8063 case PPC::BI__builtin_vsx_stxvl: 8064 ID = Intrinsic::ppc_vsx_stxvl; 8065 break; 8066 case PPC::BI__builtin_vsx_stxvll: 8067 ID = Intrinsic::ppc_vsx_stxvll; 8068 break; 8069 } 8070 llvm::Function *F = CGM.getIntrinsic(ID); 8071 return Builder.CreateCall(F, Ops, ""); 8072 } 8073 // Square root 8074 case PPC::BI__builtin_vsx_xvsqrtsp: 8075 case PPC::BI__builtin_vsx_xvsqrtdp: { 8076 llvm::Type *ResultType = ConvertType(E->getType()); 8077 Value *X = EmitScalarExpr(E->getArg(0)); 8078 ID = Intrinsic::sqrt; 8079 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 8080 return Builder.CreateCall(F, X); 8081 } 8082 // Count leading zeros 8083 case PPC::BI__builtin_altivec_vclzb: 8084 case PPC::BI__builtin_altivec_vclzh: 8085 case PPC::BI__builtin_altivec_vclzw: 8086 case PPC::BI__builtin_altivec_vclzd: { 8087 llvm::Type *ResultType = ConvertType(E->getType()); 8088 Value *X = EmitScalarExpr(E->getArg(0)); 8089 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8090 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 8091 return Builder.CreateCall(F, {X, Undef}); 8092 } 8093 case PPC::BI__builtin_altivec_vctzb: 8094 case PPC::BI__builtin_altivec_vctzh: 8095 case PPC::BI__builtin_altivec_vctzw: 8096 case PPC::BI__builtin_altivec_vctzd: { 8097 llvm::Type *ResultType = ConvertType(E->getType()); 8098 Value *X = EmitScalarExpr(E->getArg(0)); 8099 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8100 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 8101 return Builder.CreateCall(F, {X, Undef}); 8102 } 8103 case PPC::BI__builtin_altivec_vpopcntb: 8104 case PPC::BI__builtin_altivec_vpopcnth: 8105 case PPC::BI__builtin_altivec_vpopcntw: 8106 case PPC::BI__builtin_altivec_vpopcntd: { 8107 llvm::Type *ResultType = ConvertType(E->getType()); 8108 Value *X = EmitScalarExpr(E->getArg(0)); 8109 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 8110 return Builder.CreateCall(F, X); 8111 } 8112 // Copy sign 8113 case PPC::BI__builtin_vsx_xvcpsgnsp: 8114 case PPC::BI__builtin_vsx_xvcpsgndp: { 8115 llvm::Type *ResultType = ConvertType(E->getType()); 8116 Value *X = EmitScalarExpr(E->getArg(0)); 8117 Value *Y = EmitScalarExpr(E->getArg(1)); 8118 ID = Intrinsic::copysign; 8119 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 8120 return Builder.CreateCall(F, {X, Y}); 8121 } 8122 // Rounding/truncation 8123 case PPC::BI__builtin_vsx_xvrspip: 8124 case PPC::BI__builtin_vsx_xvrdpip: 8125 case PPC::BI__builtin_vsx_xvrdpim: 8126 case PPC::BI__builtin_vsx_xvrspim: 8127 case PPC::BI__builtin_vsx_xvrdpi: 8128 case PPC::BI__builtin_vsx_xvrspi: 8129 case PPC::BI__builtin_vsx_xvrdpic: 8130 case PPC::BI__builtin_vsx_xvrspic: 8131 case PPC::BI__builtin_vsx_xvrdpiz: 8132 case PPC::BI__builtin_vsx_xvrspiz: { 8133 llvm::Type *ResultType = ConvertType(E->getType()); 8134 Value *X = EmitScalarExpr(E->getArg(0)); 8135 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim || 8136 BuiltinID == PPC::BI__builtin_vsx_xvrspim) 8137 ID = Intrinsic::floor; 8138 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi || 8139 BuiltinID == PPC::BI__builtin_vsx_xvrspi) 8140 ID = Intrinsic::round; 8141 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || 8142 BuiltinID == PPC::BI__builtin_vsx_xvrspic) 8143 ID = Intrinsic::nearbyint; 8144 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || 8145 BuiltinID == PPC::BI__builtin_vsx_xvrspip) 8146 ID = Intrinsic::ceil; 8147 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz || 8148 BuiltinID == PPC::BI__builtin_vsx_xvrspiz) 8149 ID = Intrinsic::trunc; 8150 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 8151 return Builder.CreateCall(F, X); 8152 } 8153 8154 // Absolute value 8155 case PPC::BI__builtin_vsx_xvabsdp: 8156 case PPC::BI__builtin_vsx_xvabssp: { 8157 llvm::Type *ResultType = ConvertType(E->getType()); 8158 Value *X = EmitScalarExpr(E->getArg(0)); 8159 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 8160 return Builder.CreateCall(F, X); 8161 } 8162 8163 // FMA variations 8164 case PPC::BI__builtin_vsx_xvmaddadp: 8165 case PPC::BI__builtin_vsx_xvmaddasp: 8166 case PPC::BI__builtin_vsx_xvnmaddadp: 8167 case PPC::BI__builtin_vsx_xvnmaddasp: 8168 case PPC::BI__builtin_vsx_xvmsubadp: 8169 case PPC::BI__builtin_vsx_xvmsubasp: 8170 case PPC::BI__builtin_vsx_xvnmsubadp: 8171 case PPC::BI__builtin_vsx_xvnmsubasp: { 8172 llvm::Type *ResultType = ConvertType(E->getType()); 8173 Value *X = EmitScalarExpr(E->getArg(0)); 8174 Value *Y = EmitScalarExpr(E->getArg(1)); 8175 Value *Z = EmitScalarExpr(E->getArg(2)); 8176 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 8177 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 8178 switch (BuiltinID) { 8179 case PPC::BI__builtin_vsx_xvmaddadp: 8180 case PPC::BI__builtin_vsx_xvmaddasp: 8181 return Builder.CreateCall(F, {X, Y, Z}); 8182 case PPC::BI__builtin_vsx_xvnmaddadp: 8183 case PPC::BI__builtin_vsx_xvnmaddasp: 8184 return Builder.CreateFSub(Zero, 8185 Builder.CreateCall(F, {X, Y, Z}), "sub"); 8186 case PPC::BI__builtin_vsx_xvmsubadp: 8187 case PPC::BI__builtin_vsx_xvmsubasp: 8188 return Builder.CreateCall(F, 8189 {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 8190 case PPC::BI__builtin_vsx_xvnmsubadp: 8191 case PPC::BI__builtin_vsx_xvnmsubasp: 8192 Value *FsubRes = 8193 Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 8194 return Builder.CreateFSub(Zero, FsubRes, "sub"); 8195 } 8196 llvm_unreachable("Unknown FMA operation"); 8197 return nullptr; // Suppress no-return warning 8198 } 8199 8200 case PPC::BI__builtin_vsx_insertword: { 8201 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); 8202 8203 // Third argument is a compile time constant int. It must be clamped to 8204 // to the range [0, 12]. 8205 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 8206 assert(ArgCI && 8207 "Third arg to xxinsertw intrinsic must be constant integer"); 8208 const int64_t MaxIndex = 12; 8209 int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); 8210 8211 // The builtin semantics don't exactly match the xxinsertw instructions 8212 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the 8213 // word from the first argument, and inserts it in the second argument. The 8214 // instruction extracts the word from its second input register and inserts 8215 // it into its first input register, so swap the first and second arguments. 8216 std::swap(Ops[0], Ops[1]); 8217 8218 // Need to cast the second argument from a vector of unsigned int to a 8219 // vector of long long. 8220 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); 8221 8222 if (getTarget().isLittleEndian()) { 8223 // Create a shuffle mask of (1, 0) 8224 Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), 8225 ConstantInt::get(Int32Ty, 0) 8226 }; 8227 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8228 8229 // Reverse the double words in the vector we will extract from. 8230 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 8231 Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask); 8232 8233 // Reverse the index. 8234 Index = MaxIndex - Index; 8235 } 8236 8237 // Intrinsic expects the first arg to be a vector of int. 8238 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 8239 Ops[2] = ConstantInt::getSigned(Int32Ty, Index); 8240 return Builder.CreateCall(F, Ops); 8241 } 8242 8243 case PPC::BI__builtin_vsx_extractuword: { 8244 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); 8245 8246 // Intrinsic expects the first argument to be a vector of doublewords. 8247 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 8248 8249 // The second argument is a compile time constant int that needs to 8250 // be clamped to the range [0, 12]. 8251 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]); 8252 assert(ArgCI && 8253 "Second Arg to xxextractuw intrinsic must be a constant integer!"); 8254 const int64_t MaxIndex = 12; 8255 int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); 8256 8257 if (getTarget().isLittleEndian()) { 8258 // Reverse the index. 8259 Index = MaxIndex - Index; 8260 Ops[1] = ConstantInt::getSigned(Int32Ty, Index); 8261 8262 // Emit the call, then reverse the double words of the results vector. 8263 Value *Call = Builder.CreateCall(F, Ops); 8264 8265 // Create a shuffle mask of (1, 0) 8266 Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), 8267 ConstantInt::get(Int32Ty, 0) 8268 }; 8269 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8270 8271 Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask); 8272 return ShuffleCall; 8273 } else { 8274 Ops[1] = ConstantInt::getSigned(Int32Ty, Index); 8275 return Builder.CreateCall(F, Ops); 8276 } 8277 } 8278 } 8279 } 8280 8281 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, 8282 const CallExpr *E) { 8283 switch (BuiltinID) { 8284 case AMDGPU::BI__builtin_amdgcn_div_scale: 8285 case AMDGPU::BI__builtin_amdgcn_div_scalef: { 8286 // Translate from the intrinsics's struct return to the builtin's out 8287 // argument. 8288 8289 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3)); 8290 8291 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 8292 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 8293 llvm::Value *Z = EmitScalarExpr(E->getArg(2)); 8294 8295 llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, 8296 X->getType()); 8297 8298 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); 8299 8300 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); 8301 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); 8302 8303 llvm::Type *RealFlagType 8304 = FlagOutPtr.getPointer()->getType()->getPointerElementType(); 8305 8306 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); 8307 Builder.CreateStore(FlagExt, FlagOutPtr); 8308 return Result; 8309 } 8310 case AMDGPU::BI__builtin_amdgcn_div_fmas: 8311 case AMDGPU::BI__builtin_amdgcn_div_fmasf: { 8312 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 8313 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 8314 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 8315 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); 8316 8317 llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, 8318 Src0->getType()); 8319 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); 8320 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); 8321 } 8322 8323 case AMDGPU::BI__builtin_amdgcn_ds_swizzle: 8324 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); 8325 case AMDGPU::BI__builtin_amdgcn_div_fixup: 8326 case AMDGPU::BI__builtin_amdgcn_div_fixupf: 8327 case AMDGPU::BI__builtin_amdgcn_div_fixuph: 8328 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup); 8329 case AMDGPU::BI__builtin_amdgcn_trig_preop: 8330 case AMDGPU::BI__builtin_amdgcn_trig_preopf: 8331 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); 8332 case AMDGPU::BI__builtin_amdgcn_rcp: 8333 case AMDGPU::BI__builtin_amdgcn_rcpf: 8334 case AMDGPU::BI__builtin_amdgcn_rcph: 8335 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp); 8336 case AMDGPU::BI__builtin_amdgcn_rsq: 8337 case AMDGPU::BI__builtin_amdgcn_rsqf: 8338 case AMDGPU::BI__builtin_amdgcn_rsqh: 8339 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq); 8340 case AMDGPU::BI__builtin_amdgcn_rsq_clamp: 8341 case AMDGPU::BI__builtin_amdgcn_rsq_clampf: 8342 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp); 8343 case AMDGPU::BI__builtin_amdgcn_sinf: 8344 case AMDGPU::BI__builtin_amdgcn_sinh: 8345 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin); 8346 case AMDGPU::BI__builtin_amdgcn_cosf: 8347 case AMDGPU::BI__builtin_amdgcn_cosh: 8348 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); 8349 case AMDGPU::BI__builtin_amdgcn_log_clampf: 8350 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); 8351 case AMDGPU::BI__builtin_amdgcn_ldexp: 8352 case AMDGPU::BI__builtin_amdgcn_ldexpf: 8353 case AMDGPU::BI__builtin_amdgcn_ldexph: 8354 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); 8355 case AMDGPU::BI__builtin_amdgcn_frexp_mant: 8356 case AMDGPU::BI__builtin_amdgcn_frexp_mantf: 8357 case AMDGPU::BI__builtin_amdgcn_frexp_manth: 8358 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant); 8359 case AMDGPU::BI__builtin_amdgcn_frexp_exp: 8360 case AMDGPU::BI__builtin_amdgcn_frexp_expf: { 8361 Value *Src0 = EmitScalarExpr(E->getArg(0)); 8362 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, 8363 { Builder.getInt32Ty(), Src0->getType() }); 8364 return Builder.CreateCall(F, Src0); 8365 } 8366 case AMDGPU::BI__builtin_amdgcn_frexp_exph: { 8367 Value *Src0 = EmitScalarExpr(E->getArg(0)); 8368 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, 8369 { Builder.getInt16Ty(), Src0->getType() }); 8370 return Builder.CreateCall(F, Src0); 8371 } 8372 case AMDGPU::BI__builtin_amdgcn_fract: 8373 case AMDGPU::BI__builtin_amdgcn_fractf: 8374 case AMDGPU::BI__builtin_amdgcn_fracth: 8375 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); 8376 case AMDGPU::BI__builtin_amdgcn_lerp: 8377 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); 8378 case AMDGPU::BI__builtin_amdgcn_uicmp: 8379 case AMDGPU::BI__builtin_amdgcn_uicmpl: 8380 case AMDGPU::BI__builtin_amdgcn_sicmp: 8381 case AMDGPU::BI__builtin_amdgcn_sicmpl: 8382 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp); 8383 case AMDGPU::BI__builtin_amdgcn_fcmp: 8384 case AMDGPU::BI__builtin_amdgcn_fcmpf: 8385 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp); 8386 case AMDGPU::BI__builtin_amdgcn_class: 8387 case AMDGPU::BI__builtin_amdgcn_classf: 8388 case AMDGPU::BI__builtin_amdgcn_classh: 8389 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); 8390 8391 case AMDGPU::BI__builtin_amdgcn_read_exec: { 8392 CallInst *CI = cast<CallInst>( 8393 EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); 8394 CI->setConvergent(); 8395 return CI; 8396 } 8397 8398 // amdgcn workitem 8399 case AMDGPU::BI__builtin_amdgcn_workitem_id_x: 8400 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); 8401 case AMDGPU::BI__builtin_amdgcn_workitem_id_y: 8402 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024); 8403 case AMDGPU::BI__builtin_amdgcn_workitem_id_z: 8404 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); 8405 8406 // r600 intrinsics 8407 case AMDGPU::BI__builtin_r600_recipsqrt_ieee: 8408 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: 8409 return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee); 8410 case AMDGPU::BI__builtin_r600_read_tidig_x: 8411 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); 8412 case AMDGPU::BI__builtin_r600_read_tidig_y: 8413 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); 8414 case AMDGPU::BI__builtin_r600_read_tidig_z: 8415 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); 8416 default: 8417 return nullptr; 8418 } 8419 } 8420 8421 /// Handle a SystemZ function in which the final argument is a pointer 8422 /// to an int that receives the post-instruction CC value. At the LLVM level 8423 /// this is represented as a function that returns a {result, cc} pair. 8424 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, 8425 unsigned IntrinsicID, 8426 const CallExpr *E) { 8427 unsigned NumArgs = E->getNumArgs() - 1; 8428 SmallVector<Value *, 8> Args(NumArgs); 8429 for (unsigned I = 0; I < NumArgs; ++I) 8430 Args[I] = CGF.EmitScalarExpr(E->getArg(I)); 8431 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs)); 8432 Value *F = CGF.CGM.getIntrinsic(IntrinsicID); 8433 Value *Call = CGF.Builder.CreateCall(F, Args); 8434 Value *CC = CGF.Builder.CreateExtractValue(Call, 1); 8435 CGF.Builder.CreateStore(CC, CCPtr); 8436 return CGF.Builder.CreateExtractValue(Call, 0); 8437 } 8438 8439 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, 8440 const CallExpr *E) { 8441 switch (BuiltinID) { 8442 case SystemZ::BI__builtin_tbegin: { 8443 Value *TDB = EmitScalarExpr(E->getArg(0)); 8444 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 8445 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); 8446 return Builder.CreateCall(F, {TDB, Control}); 8447 } 8448 case SystemZ::BI__builtin_tbegin_nofloat: { 8449 Value *TDB = EmitScalarExpr(E->getArg(0)); 8450 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 8451 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); 8452 return Builder.CreateCall(F, {TDB, Control}); 8453 } 8454 case SystemZ::BI__builtin_tbeginc: { 8455 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); 8456 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); 8457 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); 8458 return Builder.CreateCall(F, {TDB, Control}); 8459 } 8460 case SystemZ::BI__builtin_tabort: { 8461 Value *Data = EmitScalarExpr(E->getArg(0)); 8462 Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort); 8463 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); 8464 } 8465 case SystemZ::BI__builtin_non_tx_store: { 8466 Value *Address = EmitScalarExpr(E->getArg(0)); 8467 Value *Data = EmitScalarExpr(E->getArg(1)); 8468 Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); 8469 return Builder.CreateCall(F, {Data, Address}); 8470 } 8471 8472 // Vector builtins. Note that most vector builtins are mapped automatically 8473 // to target-specific LLVM intrinsics. The ones handled specially here can 8474 // be represented via standard LLVM IR, which is preferable to enable common 8475 // LLVM optimizations. 8476 8477 case SystemZ::BI__builtin_s390_vpopctb: 8478 case SystemZ::BI__builtin_s390_vpopcth: 8479 case SystemZ::BI__builtin_s390_vpopctf: 8480 case SystemZ::BI__builtin_s390_vpopctg: { 8481 llvm::Type *ResultType = ConvertType(E->getType()); 8482 Value *X = EmitScalarExpr(E->getArg(0)); 8483 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 8484 return Builder.CreateCall(F, X); 8485 } 8486 8487 case SystemZ::BI__builtin_s390_vclzb: 8488 case SystemZ::BI__builtin_s390_vclzh: 8489 case SystemZ::BI__builtin_s390_vclzf: 8490 case SystemZ::BI__builtin_s390_vclzg: { 8491 llvm::Type *ResultType = ConvertType(E->getType()); 8492 Value *X = EmitScalarExpr(E->getArg(0)); 8493 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8494 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 8495 return Builder.CreateCall(F, {X, Undef}); 8496 } 8497 8498 case SystemZ::BI__builtin_s390_vctzb: 8499 case SystemZ::BI__builtin_s390_vctzh: 8500 case SystemZ::BI__builtin_s390_vctzf: 8501 case SystemZ::BI__builtin_s390_vctzg: { 8502 llvm::Type *ResultType = ConvertType(E->getType()); 8503 Value *X = EmitScalarExpr(E->getArg(0)); 8504 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8505 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 8506 return Builder.CreateCall(F, {X, Undef}); 8507 } 8508 8509 case SystemZ::BI__builtin_s390_vfsqdb: { 8510 llvm::Type *ResultType = ConvertType(E->getType()); 8511 Value *X = EmitScalarExpr(E->getArg(0)); 8512 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); 8513 return Builder.CreateCall(F, X); 8514 } 8515 case SystemZ::BI__builtin_s390_vfmadb: { 8516 llvm::Type *ResultType = ConvertType(E->getType()); 8517 Value *X = EmitScalarExpr(E->getArg(0)); 8518 Value *Y = EmitScalarExpr(E->getArg(1)); 8519 Value *Z = EmitScalarExpr(E->getArg(2)); 8520 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 8521 return Builder.CreateCall(F, {X, Y, Z}); 8522 } 8523 case SystemZ::BI__builtin_s390_vfmsdb: { 8524 llvm::Type *ResultType = ConvertType(E->getType()); 8525 Value *X = EmitScalarExpr(E->getArg(0)); 8526 Value *Y = EmitScalarExpr(E->getArg(1)); 8527 Value *Z = EmitScalarExpr(E->getArg(2)); 8528 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 8529 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 8530 return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 8531 } 8532 case SystemZ::BI__builtin_s390_vflpdb: { 8533 llvm::Type *ResultType = ConvertType(E->getType()); 8534 Value *X = EmitScalarExpr(E->getArg(0)); 8535 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 8536 return Builder.CreateCall(F, X); 8537 } 8538 case SystemZ::BI__builtin_s390_vflndb: { 8539 llvm::Type *ResultType = ConvertType(E->getType()); 8540 Value *X = EmitScalarExpr(E->getArg(0)); 8541 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 8542 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 8543 return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub"); 8544 } 8545 case SystemZ::BI__builtin_s390_vfidb: { 8546 llvm::Type *ResultType = ConvertType(E->getType()); 8547 Value *X = EmitScalarExpr(E->getArg(0)); 8548 // Constant-fold the M4 and M5 mask arguments. 8549 llvm::APSInt M4, M5; 8550 bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext()); 8551 bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext()); 8552 assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?"); 8553 (void)IsConstM4; (void)IsConstM5; 8554 // Check whether this instance of vfidb can be represented via a LLVM 8555 // standard intrinsic. We only support some combinations of M4 and M5. 8556 Intrinsic::ID ID = Intrinsic::not_intrinsic; 8557 switch (M4.getZExtValue()) { 8558 default: break; 8559 case 0: // IEEE-inexact exception allowed 8560 switch (M5.getZExtValue()) { 8561 default: break; 8562 case 0: ID = Intrinsic::rint; break; 8563 } 8564 break; 8565 case 4: // IEEE-inexact exception suppressed 8566 switch (M5.getZExtValue()) { 8567 default: break; 8568 case 0: ID = Intrinsic::nearbyint; break; 8569 case 1: ID = Intrinsic::round; break; 8570 case 5: ID = Intrinsic::trunc; break; 8571 case 6: ID = Intrinsic::ceil; break; 8572 case 7: ID = Intrinsic::floor; break; 8573 } 8574 break; 8575 } 8576 if (ID != Intrinsic::not_intrinsic) { 8577 Function *F = CGM.getIntrinsic(ID, ResultType); 8578 return Builder.CreateCall(F, X); 8579 } 8580 Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb); 8581 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 8582 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5); 8583 return Builder.CreateCall(F, {X, M4Value, M5Value}); 8584 } 8585 8586 // Vector intrisincs that output the post-instruction CC value. 8587 8588 #define INTRINSIC_WITH_CC(NAME) \ 8589 case SystemZ::BI__builtin_##NAME: \ 8590 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E) 8591 8592 INTRINSIC_WITH_CC(s390_vpkshs); 8593 INTRINSIC_WITH_CC(s390_vpksfs); 8594 INTRINSIC_WITH_CC(s390_vpksgs); 8595 8596 INTRINSIC_WITH_CC(s390_vpklshs); 8597 INTRINSIC_WITH_CC(s390_vpklsfs); 8598 INTRINSIC_WITH_CC(s390_vpklsgs); 8599 8600 INTRINSIC_WITH_CC(s390_vceqbs); 8601 INTRINSIC_WITH_CC(s390_vceqhs); 8602 INTRINSIC_WITH_CC(s390_vceqfs); 8603 INTRINSIC_WITH_CC(s390_vceqgs); 8604 8605 INTRINSIC_WITH_CC(s390_vchbs); 8606 INTRINSIC_WITH_CC(s390_vchhs); 8607 INTRINSIC_WITH_CC(s390_vchfs); 8608 INTRINSIC_WITH_CC(s390_vchgs); 8609 8610 INTRINSIC_WITH_CC(s390_vchlbs); 8611 INTRINSIC_WITH_CC(s390_vchlhs); 8612 INTRINSIC_WITH_CC(s390_vchlfs); 8613 INTRINSIC_WITH_CC(s390_vchlgs); 8614 8615 INTRINSIC_WITH_CC(s390_vfaebs); 8616 INTRINSIC_WITH_CC(s390_vfaehs); 8617 INTRINSIC_WITH_CC(s390_vfaefs); 8618 8619 INTRINSIC_WITH_CC(s390_vfaezbs); 8620 INTRINSIC_WITH_CC(s390_vfaezhs); 8621 INTRINSIC_WITH_CC(s390_vfaezfs); 8622 8623 INTRINSIC_WITH_CC(s390_vfeebs); 8624 INTRINSIC_WITH_CC(s390_vfeehs); 8625 INTRINSIC_WITH_CC(s390_vfeefs); 8626 8627 INTRINSIC_WITH_CC(s390_vfeezbs); 8628 INTRINSIC_WITH_CC(s390_vfeezhs); 8629 INTRINSIC_WITH_CC(s390_vfeezfs); 8630 8631 INTRINSIC_WITH_CC(s390_vfenebs); 8632 INTRINSIC_WITH_CC(s390_vfenehs); 8633 INTRINSIC_WITH_CC(s390_vfenefs); 8634 8635 INTRINSIC_WITH_CC(s390_vfenezbs); 8636 INTRINSIC_WITH_CC(s390_vfenezhs); 8637 INTRINSIC_WITH_CC(s390_vfenezfs); 8638 8639 INTRINSIC_WITH_CC(s390_vistrbs); 8640 INTRINSIC_WITH_CC(s390_vistrhs); 8641 INTRINSIC_WITH_CC(s390_vistrfs); 8642 8643 INTRINSIC_WITH_CC(s390_vstrcbs); 8644 INTRINSIC_WITH_CC(s390_vstrchs); 8645 INTRINSIC_WITH_CC(s390_vstrcfs); 8646 8647 INTRINSIC_WITH_CC(s390_vstrczbs); 8648 INTRINSIC_WITH_CC(s390_vstrczhs); 8649 INTRINSIC_WITH_CC(s390_vstrczfs); 8650 8651 INTRINSIC_WITH_CC(s390_vfcedbs); 8652 INTRINSIC_WITH_CC(s390_vfchdbs); 8653 INTRINSIC_WITH_CC(s390_vfchedbs); 8654 8655 INTRINSIC_WITH_CC(s390_vftcidb); 8656 8657 #undef INTRINSIC_WITH_CC 8658 8659 default: 8660 return nullptr; 8661 } 8662 } 8663 8664 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, 8665 const CallExpr *E) { 8666 auto MakeLdg = [&](unsigned IntrinsicID) { 8667 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8668 AlignmentSource AlignSource; 8669 clang::CharUnits Align = 8670 getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource); 8671 return Builder.CreateCall( 8672 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 8673 Ptr->getType()}), 8674 {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())}); 8675 }; 8676 auto MakeScopedAtomic = [&](unsigned IntrinsicID) { 8677 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8678 return Builder.CreateCall( 8679 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 8680 Ptr->getType()}), 8681 {Ptr, EmitScalarExpr(E->getArg(1))}); 8682 }; 8683 switch (BuiltinID) { 8684 case NVPTX::BI__nvvm_atom_add_gen_i: 8685 case NVPTX::BI__nvvm_atom_add_gen_l: 8686 case NVPTX::BI__nvvm_atom_add_gen_ll: 8687 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E); 8688 8689 case NVPTX::BI__nvvm_atom_sub_gen_i: 8690 case NVPTX::BI__nvvm_atom_sub_gen_l: 8691 case NVPTX::BI__nvvm_atom_sub_gen_ll: 8692 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E); 8693 8694 case NVPTX::BI__nvvm_atom_and_gen_i: 8695 case NVPTX::BI__nvvm_atom_and_gen_l: 8696 case NVPTX::BI__nvvm_atom_and_gen_ll: 8697 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E); 8698 8699 case NVPTX::BI__nvvm_atom_or_gen_i: 8700 case NVPTX::BI__nvvm_atom_or_gen_l: 8701 case NVPTX::BI__nvvm_atom_or_gen_ll: 8702 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E); 8703 8704 case NVPTX::BI__nvvm_atom_xor_gen_i: 8705 case NVPTX::BI__nvvm_atom_xor_gen_l: 8706 case NVPTX::BI__nvvm_atom_xor_gen_ll: 8707 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E); 8708 8709 case NVPTX::BI__nvvm_atom_xchg_gen_i: 8710 case NVPTX::BI__nvvm_atom_xchg_gen_l: 8711 case NVPTX::BI__nvvm_atom_xchg_gen_ll: 8712 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E); 8713 8714 case NVPTX::BI__nvvm_atom_max_gen_i: 8715 case NVPTX::BI__nvvm_atom_max_gen_l: 8716 case NVPTX::BI__nvvm_atom_max_gen_ll: 8717 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E); 8718 8719 case NVPTX::BI__nvvm_atom_max_gen_ui: 8720 case NVPTX::BI__nvvm_atom_max_gen_ul: 8721 case NVPTX::BI__nvvm_atom_max_gen_ull: 8722 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E); 8723 8724 case NVPTX::BI__nvvm_atom_min_gen_i: 8725 case NVPTX::BI__nvvm_atom_min_gen_l: 8726 case NVPTX::BI__nvvm_atom_min_gen_ll: 8727 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E); 8728 8729 case NVPTX::BI__nvvm_atom_min_gen_ui: 8730 case NVPTX::BI__nvvm_atom_min_gen_ul: 8731 case NVPTX::BI__nvvm_atom_min_gen_ull: 8732 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E); 8733 8734 case NVPTX::BI__nvvm_atom_cas_gen_i: 8735 case NVPTX::BI__nvvm_atom_cas_gen_l: 8736 case NVPTX::BI__nvvm_atom_cas_gen_ll: 8737 // __nvvm_atom_cas_gen_* should return the old value rather than the 8738 // success flag. 8739 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false); 8740 8741 case NVPTX::BI__nvvm_atom_add_gen_f: { 8742 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8743 Value *Val = EmitScalarExpr(E->getArg(1)); 8744 // atomicrmw only deals with integer arguments so we need to use 8745 // LLVM's nvvm_atomic_load_add_f32 intrinsic for that. 8746 Value *FnALAF32 = 8747 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType()); 8748 return Builder.CreateCall(FnALAF32, {Ptr, Val}); 8749 } 8750 8751 case NVPTX::BI__nvvm_atom_inc_gen_ui: { 8752 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8753 Value *Val = EmitScalarExpr(E->getArg(1)); 8754 Value *FnALI32 = 8755 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); 8756 return Builder.CreateCall(FnALI32, {Ptr, Val}); 8757 } 8758 8759 case NVPTX::BI__nvvm_atom_dec_gen_ui: { 8760 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8761 Value *Val = EmitScalarExpr(E->getArg(1)); 8762 Value *FnALD32 = 8763 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); 8764 return Builder.CreateCall(FnALD32, {Ptr, Val}); 8765 } 8766 8767 case NVPTX::BI__nvvm_ldg_c: 8768 case NVPTX::BI__nvvm_ldg_c2: 8769 case NVPTX::BI__nvvm_ldg_c4: 8770 case NVPTX::BI__nvvm_ldg_s: 8771 case NVPTX::BI__nvvm_ldg_s2: 8772 case NVPTX::BI__nvvm_ldg_s4: 8773 case NVPTX::BI__nvvm_ldg_i: 8774 case NVPTX::BI__nvvm_ldg_i2: 8775 case NVPTX::BI__nvvm_ldg_i4: 8776 case NVPTX::BI__nvvm_ldg_l: 8777 case NVPTX::BI__nvvm_ldg_ll: 8778 case NVPTX::BI__nvvm_ldg_ll2: 8779 case NVPTX::BI__nvvm_ldg_uc: 8780 case NVPTX::BI__nvvm_ldg_uc2: 8781 case NVPTX::BI__nvvm_ldg_uc4: 8782 case NVPTX::BI__nvvm_ldg_us: 8783 case NVPTX::BI__nvvm_ldg_us2: 8784 case NVPTX::BI__nvvm_ldg_us4: 8785 case NVPTX::BI__nvvm_ldg_ui: 8786 case NVPTX::BI__nvvm_ldg_ui2: 8787 case NVPTX::BI__nvvm_ldg_ui4: 8788 case NVPTX::BI__nvvm_ldg_ul: 8789 case NVPTX::BI__nvvm_ldg_ull: 8790 case NVPTX::BI__nvvm_ldg_ull2: 8791 // PTX Interoperability section 2.2: "For a vector with an even number of 8792 // elements, its alignment is set to number of elements times the alignment 8793 // of its member: n*alignof(t)." 8794 return MakeLdg(Intrinsic::nvvm_ldg_global_i); 8795 case NVPTX::BI__nvvm_ldg_f: 8796 case NVPTX::BI__nvvm_ldg_f2: 8797 case NVPTX::BI__nvvm_ldg_f4: 8798 case NVPTX::BI__nvvm_ldg_d: 8799 case NVPTX::BI__nvvm_ldg_d2: 8800 return MakeLdg(Intrinsic::nvvm_ldg_global_f); 8801 8802 case NVPTX::BI__nvvm_atom_cta_add_gen_i: 8803 case NVPTX::BI__nvvm_atom_cta_add_gen_l: 8804 case NVPTX::BI__nvvm_atom_cta_add_gen_ll: 8805 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta); 8806 case NVPTX::BI__nvvm_atom_sys_add_gen_i: 8807 case NVPTX::BI__nvvm_atom_sys_add_gen_l: 8808 case NVPTX::BI__nvvm_atom_sys_add_gen_ll: 8809 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys); 8810 case NVPTX::BI__nvvm_atom_cta_add_gen_f: 8811 case NVPTX::BI__nvvm_atom_cta_add_gen_d: 8812 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta); 8813 case NVPTX::BI__nvvm_atom_sys_add_gen_f: 8814 case NVPTX::BI__nvvm_atom_sys_add_gen_d: 8815 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys); 8816 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i: 8817 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l: 8818 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll: 8819 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta); 8820 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i: 8821 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l: 8822 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll: 8823 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys); 8824 case NVPTX::BI__nvvm_atom_cta_max_gen_i: 8825 case NVPTX::BI__nvvm_atom_cta_max_gen_ui: 8826 case NVPTX::BI__nvvm_atom_cta_max_gen_l: 8827 case NVPTX::BI__nvvm_atom_cta_max_gen_ul: 8828 case NVPTX::BI__nvvm_atom_cta_max_gen_ll: 8829 case NVPTX::BI__nvvm_atom_cta_max_gen_ull: 8830 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta); 8831 case NVPTX::BI__nvvm_atom_sys_max_gen_i: 8832 case NVPTX::BI__nvvm_atom_sys_max_gen_ui: 8833 case NVPTX::BI__nvvm_atom_sys_max_gen_l: 8834 case NVPTX::BI__nvvm_atom_sys_max_gen_ul: 8835 case NVPTX::BI__nvvm_atom_sys_max_gen_ll: 8836 case NVPTX::BI__nvvm_atom_sys_max_gen_ull: 8837 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys); 8838 case NVPTX::BI__nvvm_atom_cta_min_gen_i: 8839 case NVPTX::BI__nvvm_atom_cta_min_gen_ui: 8840 case NVPTX::BI__nvvm_atom_cta_min_gen_l: 8841 case NVPTX::BI__nvvm_atom_cta_min_gen_ul: 8842 case NVPTX::BI__nvvm_atom_cta_min_gen_ll: 8843 case NVPTX::BI__nvvm_atom_cta_min_gen_ull: 8844 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta); 8845 case NVPTX::BI__nvvm_atom_sys_min_gen_i: 8846 case NVPTX::BI__nvvm_atom_sys_min_gen_ui: 8847 case NVPTX::BI__nvvm_atom_sys_min_gen_l: 8848 case NVPTX::BI__nvvm_atom_sys_min_gen_ul: 8849 case NVPTX::BI__nvvm_atom_sys_min_gen_ll: 8850 case NVPTX::BI__nvvm_atom_sys_min_gen_ull: 8851 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys); 8852 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui: 8853 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta); 8854 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui: 8855 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta); 8856 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui: 8857 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys); 8858 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui: 8859 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys); 8860 case NVPTX::BI__nvvm_atom_cta_and_gen_i: 8861 case NVPTX::BI__nvvm_atom_cta_and_gen_l: 8862 case NVPTX::BI__nvvm_atom_cta_and_gen_ll: 8863 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta); 8864 case NVPTX::BI__nvvm_atom_sys_and_gen_i: 8865 case NVPTX::BI__nvvm_atom_sys_and_gen_l: 8866 case NVPTX::BI__nvvm_atom_sys_and_gen_ll: 8867 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys); 8868 case NVPTX::BI__nvvm_atom_cta_or_gen_i: 8869 case NVPTX::BI__nvvm_atom_cta_or_gen_l: 8870 case NVPTX::BI__nvvm_atom_cta_or_gen_ll: 8871 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta); 8872 case NVPTX::BI__nvvm_atom_sys_or_gen_i: 8873 case NVPTX::BI__nvvm_atom_sys_or_gen_l: 8874 case NVPTX::BI__nvvm_atom_sys_or_gen_ll: 8875 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys); 8876 case NVPTX::BI__nvvm_atom_cta_xor_gen_i: 8877 case NVPTX::BI__nvvm_atom_cta_xor_gen_l: 8878 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll: 8879 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta); 8880 case NVPTX::BI__nvvm_atom_sys_xor_gen_i: 8881 case NVPTX::BI__nvvm_atom_sys_xor_gen_l: 8882 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll: 8883 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys); 8884 case NVPTX::BI__nvvm_atom_cta_cas_gen_i: 8885 case NVPTX::BI__nvvm_atom_cta_cas_gen_l: 8886 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: { 8887 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8888 return Builder.CreateCall( 8889 CGM.getIntrinsic( 8890 Intrinsic::nvvm_atomic_cas_gen_i_cta, 8891 {Ptr->getType()->getPointerElementType(), Ptr->getType()}), 8892 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 8893 } 8894 case NVPTX::BI__nvvm_atom_sys_cas_gen_i: 8895 case NVPTX::BI__nvvm_atom_sys_cas_gen_l: 8896 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: { 8897 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8898 return Builder.CreateCall( 8899 CGM.getIntrinsic( 8900 Intrinsic::nvvm_atomic_cas_gen_i_sys, 8901 {Ptr->getType()->getPointerElementType(), Ptr->getType()}), 8902 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 8903 } 8904 default: 8905 return nullptr; 8906 } 8907 } 8908 8909 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, 8910 const CallExpr *E) { 8911 switch (BuiltinID) { 8912 case WebAssembly::BI__builtin_wasm_current_memory: { 8913 llvm::Type *ResultType = ConvertType(E->getType()); 8914 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType); 8915 return Builder.CreateCall(Callee); 8916 } 8917 case WebAssembly::BI__builtin_wasm_grow_memory: { 8918 Value *X = EmitScalarExpr(E->getArg(0)); 8919 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType()); 8920 return Builder.CreateCall(Callee, X); 8921 } 8922 8923 default: 8924 return nullptr; 8925 } 8926 } 8927