1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit Builtin calls as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGCXXABI.h" 15 #include "CGObjCRuntime.h" 16 #include "CGOpenCLRuntime.h" 17 #include "CodeGenFunction.h" 18 #include "CodeGenModule.h" 19 #include "TargetInfo.h" 20 #include "clang/AST/ASTContext.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/Analysis/Analyses/OSLog.h" 23 #include "clang/Basic/TargetBuiltins.h" 24 #include "clang/Basic/TargetInfo.h" 25 #include "clang/CodeGen/CGFunctionInfo.h" 26 #include "llvm/ADT/StringExtras.h" 27 #include "llvm/IR/CallSite.h" 28 #include "llvm/IR/DataLayout.h" 29 #include "llvm/IR/InlineAsm.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/MDBuilder.h" 32 #include <sstream> 33 34 using namespace clang; 35 using namespace CodeGen; 36 using namespace llvm; 37 38 static 39 int64_t clamp(int64_t Value, int64_t Low, int64_t High) { 40 return std::min(High, std::max(Low, Value)); 41 } 42 43 /// getBuiltinLibFunction - Given a builtin id for a function like 44 /// "__builtin_fabsf", return a Function* for "fabsf". 45 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, 46 unsigned BuiltinID) { 47 assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); 48 49 // Get the name, skip over the __builtin_ prefix (if necessary). 50 StringRef Name; 51 GlobalDecl D(FD); 52 53 // If the builtin has been declared explicitly with an assembler label, 54 // use the mangled name. This differs from the plain label on platforms 55 // that prefix labels. 56 if (FD->hasAttr<AsmLabelAttr>()) 57 Name = getMangledName(D); 58 else 59 Name = Context.BuiltinInfo.getName(BuiltinID) + 10; 60 61 llvm::FunctionType *Ty = 62 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); 63 64 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); 65 } 66 67 /// Emit the conversions required to turn the given value into an 68 /// integer of the given size. 69 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 70 QualType T, llvm::IntegerType *IntType) { 71 V = CGF.EmitToMemory(V, T); 72 73 if (V->getType()->isPointerTy()) 74 return CGF.Builder.CreatePtrToInt(V, IntType); 75 76 assert(V->getType() == IntType); 77 return V; 78 } 79 80 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 81 QualType T, llvm::Type *ResultType) { 82 V = CGF.EmitFromMemory(V, T); 83 84 if (ResultType->isPointerTy()) 85 return CGF.Builder.CreateIntToPtr(V, ResultType); 86 87 assert(V->getType() == ResultType); 88 return V; 89 } 90 91 /// Utility to insert an atomic instruction based on Instrinsic::ID 92 /// and the expression node. 93 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, 94 llvm::AtomicRMWInst::BinOp Kind, 95 const CallExpr *E) { 96 QualType T = E->getType(); 97 assert(E->getArg(0)->getType()->isPointerType()); 98 assert(CGF.getContext().hasSameUnqualifiedType(T, 99 E->getArg(0)->getType()->getPointeeType())); 100 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 101 102 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 103 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 104 105 llvm::IntegerType *IntType = 106 llvm::IntegerType::get(CGF.getLLVMContext(), 107 CGF.getContext().getTypeSize(T)); 108 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 109 110 llvm::Value *Args[2]; 111 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 112 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 113 llvm::Type *ValueType = Args[1]->getType(); 114 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 115 116 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 117 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 118 return EmitFromInt(CGF, Result, T, ValueType); 119 } 120 121 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { 122 Value *Val = CGF.EmitScalarExpr(E->getArg(0)); 123 Value *Address = CGF.EmitScalarExpr(E->getArg(1)); 124 125 // Convert the type of the pointer to a pointer to the stored type. 126 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); 127 Value *BC = CGF.Builder.CreateBitCast( 128 Address, llvm::PointerType::getUnqual(Val->getType()), "cast"); 129 LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType()); 130 LV.setNontemporal(true); 131 CGF.EmitStoreOfScalar(Val, LV, false); 132 return nullptr; 133 } 134 135 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) { 136 Value *Address = CGF.EmitScalarExpr(E->getArg(0)); 137 138 LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType()); 139 LV.setNontemporal(true); 140 return CGF.EmitLoadOfScalar(LV, E->getExprLoc()); 141 } 142 143 static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 144 llvm::AtomicRMWInst::BinOp Kind, 145 const CallExpr *E) { 146 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E)); 147 } 148 149 /// Utility to insert an atomic instruction based Instrinsic::ID and 150 /// the expression node, where the return value is the result of the 151 /// operation. 152 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 153 llvm::AtomicRMWInst::BinOp Kind, 154 const CallExpr *E, 155 Instruction::BinaryOps Op, 156 bool Invert = false) { 157 QualType T = E->getType(); 158 assert(E->getArg(0)->getType()->isPointerType()); 159 assert(CGF.getContext().hasSameUnqualifiedType(T, 160 E->getArg(0)->getType()->getPointeeType())); 161 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 162 163 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 164 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 165 166 llvm::IntegerType *IntType = 167 llvm::IntegerType::get(CGF.getLLVMContext(), 168 CGF.getContext().getTypeSize(T)); 169 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 170 171 llvm::Value *Args[2]; 172 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 173 llvm::Type *ValueType = Args[1]->getType(); 174 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 175 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 176 177 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 178 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 179 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 180 if (Invert) 181 Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, 182 llvm::ConstantInt::get(IntType, -1)); 183 Result = EmitFromInt(CGF, Result, T, ValueType); 184 return RValue::get(Result); 185 } 186 187 /// @brief Utility to insert an atomic cmpxchg instruction. 188 /// 189 /// @param CGF The current codegen function. 190 /// @param E Builtin call expression to convert to cmpxchg. 191 /// arg0 - address to operate on 192 /// arg1 - value to compare with 193 /// arg2 - new value 194 /// @param ReturnBool Specifies whether to return success flag of 195 /// cmpxchg result or the old value. 196 /// 197 /// @returns result of cmpxchg, according to ReturnBool 198 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, 199 bool ReturnBool) { 200 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); 201 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 202 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 203 204 llvm::IntegerType *IntType = llvm::IntegerType::get( 205 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); 206 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 207 208 Value *Args[3]; 209 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 210 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 211 llvm::Type *ValueType = Args[1]->getType(); 212 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 213 Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); 214 215 Value *Pair = CGF.Builder.CreateAtomicCmpXchg( 216 Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent, 217 llvm::AtomicOrdering::SequentiallyConsistent); 218 if (ReturnBool) 219 // Extract boolean success flag and zext it to int. 220 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1), 221 CGF.ConvertType(E->getType())); 222 else 223 // Extract old value and emit it using the same type as compare value. 224 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T, 225 ValueType); 226 } 227 228 // Emit a simple mangled intrinsic that has 1 argument and a return type 229 // matching the argument type. 230 static Value *emitUnaryBuiltin(CodeGenFunction &CGF, 231 const CallExpr *E, 232 unsigned IntrinsicID) { 233 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 234 235 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 236 return CGF.Builder.CreateCall(F, Src0); 237 } 238 239 // Emit an intrinsic that has 2 operands of the same type as its result. 240 static Value *emitBinaryBuiltin(CodeGenFunction &CGF, 241 const CallExpr *E, 242 unsigned IntrinsicID) { 243 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 244 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 245 246 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 247 return CGF.Builder.CreateCall(F, { Src0, Src1 }); 248 } 249 250 // Emit an intrinsic that has 3 operands of the same type as its result. 251 static Value *emitTernaryBuiltin(CodeGenFunction &CGF, 252 const CallExpr *E, 253 unsigned IntrinsicID) { 254 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 255 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 256 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); 257 258 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 259 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); 260 } 261 262 // Emit an intrinsic that has 1 float or double operand, and 1 integer. 263 static Value *emitFPIntBuiltin(CodeGenFunction &CGF, 264 const CallExpr *E, 265 unsigned IntrinsicID) { 266 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 267 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 268 269 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 270 return CGF.Builder.CreateCall(F, {Src0, Src1}); 271 } 272 273 /// EmitFAbs - Emit a call to @llvm.fabs(). 274 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { 275 Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); 276 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); 277 Call->setDoesNotAccessMemory(); 278 return Call; 279 } 280 281 /// Emit the computation of the sign bit for a floating point value. Returns 282 /// the i1 sign bit value. 283 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { 284 LLVMContext &C = CGF.CGM.getLLVMContext(); 285 286 llvm::Type *Ty = V->getType(); 287 int Width = Ty->getPrimitiveSizeInBits(); 288 llvm::Type *IntTy = llvm::IntegerType::get(C, Width); 289 V = CGF.Builder.CreateBitCast(V, IntTy); 290 if (Ty->isPPC_FP128Ty()) { 291 // We want the sign bit of the higher-order double. The bitcast we just 292 // did works as if the double-double was stored to memory and then 293 // read as an i128. The "store" will put the higher-order double in the 294 // lower address in both little- and big-Endian modes, but the "load" 295 // will treat those bits as a different part of the i128: the low bits in 296 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian 297 // we need to shift the high bits down to the low before truncating. 298 Width >>= 1; 299 if (CGF.getTarget().isBigEndian()) { 300 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); 301 V = CGF.Builder.CreateLShr(V, ShiftCst); 302 } 303 // We are truncating value in order to extract the higher-order 304 // double, which we will be using to extract the sign from. 305 IntTy = llvm::IntegerType::get(C, Width); 306 V = CGF.Builder.CreateTrunc(V, IntTy); 307 } 308 Value *Zero = llvm::Constant::getNullValue(IntTy); 309 return CGF.Builder.CreateICmpSLT(V, Zero); 310 } 311 312 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, 313 const CallExpr *E, llvm::Constant *calleeValue) { 314 CGCallee callee = CGCallee::forDirect(calleeValue, FD); 315 return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); 316 } 317 318 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* 319 /// depending on IntrinsicID. 320 /// 321 /// \arg CGF The current codegen function. 322 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. 323 /// \arg X The first argument to the llvm.*.with.overflow.*. 324 /// \arg Y The second argument to the llvm.*.with.overflow.*. 325 /// \arg Carry The carry returned by the llvm.*.with.overflow.*. 326 /// \returns The result (i.e. sum/product) returned by the intrinsic. 327 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, 328 const llvm::Intrinsic::ID IntrinsicID, 329 llvm::Value *X, llvm::Value *Y, 330 llvm::Value *&Carry) { 331 // Make sure we have integers of the same width. 332 assert(X->getType() == Y->getType() && 333 "Arguments must be the same type. (Did you forget to make sure both " 334 "arguments have the same integer width?)"); 335 336 llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); 337 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y}); 338 Carry = CGF.Builder.CreateExtractValue(Tmp, 1); 339 return CGF.Builder.CreateExtractValue(Tmp, 0); 340 } 341 342 static Value *emitRangedBuiltin(CodeGenFunction &CGF, 343 unsigned IntrinsicID, 344 int low, int high) { 345 llvm::MDBuilder MDHelper(CGF.getLLVMContext()); 346 llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); 347 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); 348 llvm::Instruction *Call = CGF.Builder.CreateCall(F); 349 Call->setMetadata(llvm::LLVMContext::MD_range, RNode); 350 return Call; 351 } 352 353 namespace { 354 struct WidthAndSignedness { 355 unsigned Width; 356 bool Signed; 357 }; 358 } 359 360 static WidthAndSignedness 361 getIntegerWidthAndSignedness(const clang::ASTContext &context, 362 const clang::QualType Type) { 363 assert(Type->isIntegerType() && "Given type is not an integer."); 364 unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width; 365 bool Signed = Type->isSignedIntegerType(); 366 return {Width, Signed}; 367 } 368 369 // Given one or more integer types, this function produces an integer type that 370 // encompasses them: any value in one of the given types could be expressed in 371 // the encompassing type. 372 static struct WidthAndSignedness 373 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) { 374 assert(Types.size() > 0 && "Empty list of types."); 375 376 // If any of the given types is signed, we must return a signed type. 377 bool Signed = false; 378 for (const auto &Type : Types) { 379 Signed |= Type.Signed; 380 } 381 382 // The encompassing type must have a width greater than or equal to the width 383 // of the specified types. Aditionally, if the encompassing type is signed, 384 // its width must be strictly greater than the width of any unsigned types 385 // given. 386 unsigned Width = 0; 387 for (const auto &Type : Types) { 388 unsigned MinWidth = Type.Width + (Signed && !Type.Signed); 389 if (Width < MinWidth) { 390 Width = MinWidth; 391 } 392 } 393 394 return {Width, Signed}; 395 } 396 397 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) { 398 llvm::Type *DestType = Int8PtrTy; 399 if (ArgValue->getType() != DestType) 400 ArgValue = 401 Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data()); 402 403 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend; 404 return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue); 405 } 406 407 /// Checks if using the result of __builtin_object_size(p, @p From) in place of 408 /// __builtin_object_size(p, @p To) is correct 409 static bool areBOSTypesCompatible(int From, int To) { 410 // Note: Our __builtin_object_size implementation currently treats Type=0 and 411 // Type=2 identically. Encoding this implementation detail here may make 412 // improving __builtin_object_size difficult in the future, so it's omitted. 413 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2); 414 } 415 416 static llvm::Value * 417 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { 418 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true); 419 } 420 421 llvm::Value * 422 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, 423 llvm::IntegerType *ResType, 424 llvm::Value *EmittedE) { 425 uint64_t ObjectSize; 426 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) 427 return emitBuiltinObjectSize(E, Type, ResType, EmittedE); 428 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); 429 } 430 431 /// Returns a Value corresponding to the size of the given expression. 432 /// This Value may be either of the following: 433 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on 434 /// it) 435 /// - A call to the @llvm.objectsize intrinsic 436 /// 437 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null 438 /// and we wouldn't otherwise try to reference a pass_object_size parameter, 439 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E. 440 llvm::Value * 441 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, 442 llvm::IntegerType *ResType, 443 llvm::Value *EmittedE) { 444 // We need to reference an argument if the pointer is a parameter with the 445 // pass_object_size attribute. 446 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { 447 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl()); 448 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>(); 449 if (Param != nullptr && PS != nullptr && 450 areBOSTypesCompatible(PS->getType(), Type)) { 451 auto Iter = SizeArguments.find(Param); 452 assert(Iter != SizeArguments.end()); 453 454 const ImplicitParamDecl *D = Iter->second; 455 auto DIter = LocalDeclMap.find(D); 456 assert(DIter != LocalDeclMap.end()); 457 458 return EmitLoadOfScalar(DIter->second, /*volatile=*/false, 459 getContext().getSizeType(), E->getLocStart()); 460 } 461 } 462 463 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't 464 // evaluate E for side-effects. In either case, we shouldn't lower to 465 // @llvm.objectsize. 466 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext()))) 467 return getDefaultBuiltinObjectSizeResult(Type, ResType); 468 469 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E); 470 assert(Ptr->getType()->isPointerTy() && 471 "Non-pointer passed to __builtin_object_size?"); 472 473 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); 474 475 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. 476 Value *Min = Builder.getInt1((Type & 2) != 0); 477 // For GCC compatability, __builtin_object_size treat NULL as unknown size. 478 Value *NullIsUnknown = Builder.getTrue(); 479 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown}); 480 } 481 482 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we 483 // handle them here. 484 enum class CodeGenFunction::MSVCIntrin { 485 _BitScanForward, 486 _BitScanReverse, 487 _InterlockedAnd, 488 _InterlockedDecrement, 489 _InterlockedExchange, 490 _InterlockedExchangeAdd, 491 _InterlockedExchangeSub, 492 _InterlockedIncrement, 493 _InterlockedOr, 494 _InterlockedXor, 495 _interlockedbittestandset, 496 __fastfail, 497 }; 498 499 Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, 500 const CallExpr *E) { 501 switch (BuiltinID) { 502 case MSVCIntrin::_BitScanForward: 503 case MSVCIntrin::_BitScanReverse: { 504 Value *ArgValue = EmitScalarExpr(E->getArg(1)); 505 506 llvm::Type *ArgType = ArgValue->getType(); 507 llvm::Type *IndexType = 508 EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType(); 509 llvm::Type *ResultType = ConvertType(E->getType()); 510 511 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 512 Value *ResZero = llvm::Constant::getNullValue(ResultType); 513 Value *ResOne = llvm::ConstantInt::get(ResultType, 1); 514 515 BasicBlock *Begin = Builder.GetInsertBlock(); 516 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn); 517 Builder.SetInsertPoint(End); 518 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result"); 519 520 Builder.SetInsertPoint(Begin); 521 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero); 522 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn); 523 Builder.CreateCondBr(IsZero, End, NotZero); 524 Result->addIncoming(ResZero, Begin); 525 526 Builder.SetInsertPoint(NotZero); 527 Address IndexAddress = EmitPointerWithAlignment(E->getArg(0)); 528 529 if (BuiltinID == MSVCIntrin::_BitScanForward) { 530 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 531 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 532 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 533 Builder.CreateStore(ZeroCount, IndexAddress, false); 534 } else { 535 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 536 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1); 537 538 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 539 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); 540 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); 541 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount); 542 Builder.CreateStore(Index, IndexAddress, false); 543 } 544 Builder.CreateBr(End); 545 Result->addIncoming(ResOne, NotZero); 546 547 Builder.SetInsertPoint(End); 548 return Result; 549 } 550 case MSVCIntrin::_InterlockedAnd: 551 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E); 552 case MSVCIntrin::_InterlockedExchange: 553 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E); 554 case MSVCIntrin::_InterlockedExchangeAdd: 555 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E); 556 case MSVCIntrin::_InterlockedExchangeSub: 557 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E); 558 case MSVCIntrin::_InterlockedOr: 559 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E); 560 case MSVCIntrin::_InterlockedXor: 561 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); 562 563 case MSVCIntrin::_interlockedbittestandset: { 564 llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); 565 llvm::Value *Bit = EmitScalarExpr(E->getArg(1)); 566 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 567 AtomicRMWInst::Or, Addr, 568 Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit), 569 llvm::AtomicOrdering::SequentiallyConsistent); 570 // Shift the relevant bit to the least significant position, truncate to 571 // the result type, and test the low bit. 572 llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit); 573 llvm::Value *Truncated = 574 Builder.CreateTrunc(Shifted, ConvertType(E->getType())); 575 return Builder.CreateAnd(Truncated, 576 ConstantInt::get(Truncated->getType(), 1)); 577 } 578 579 case MSVCIntrin::_InterlockedDecrement: { 580 llvm::Type *IntTy = ConvertType(E->getType()); 581 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 582 AtomicRMWInst::Sub, 583 EmitScalarExpr(E->getArg(0)), 584 ConstantInt::get(IntTy, 1), 585 llvm::AtomicOrdering::SequentiallyConsistent); 586 return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)); 587 } 588 case MSVCIntrin::_InterlockedIncrement: { 589 llvm::Type *IntTy = ConvertType(E->getType()); 590 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 591 AtomicRMWInst::Add, 592 EmitScalarExpr(E->getArg(0)), 593 ConstantInt::get(IntTy, 1), 594 llvm::AtomicOrdering::SequentiallyConsistent); 595 return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)); 596 } 597 598 case MSVCIntrin::__fastfail: { 599 // Request immediate process termination from the kernel. The instruction 600 // sequences to do this are documented on MSDN: 601 // https://msdn.microsoft.com/en-us/library/dn774154.aspx 602 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch(); 603 StringRef Asm, Constraints; 604 switch (ISA) { 605 default: 606 ErrorUnsupported(E, "__fastfail call for this architecture"); 607 break; 608 case llvm::Triple::x86: 609 case llvm::Triple::x86_64: 610 Asm = "int $$0x29"; 611 Constraints = "{cx}"; 612 break; 613 case llvm::Triple::thumb: 614 Asm = "udf #251"; 615 Constraints = "{r0}"; 616 break; 617 } 618 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false); 619 llvm::InlineAsm *IA = 620 llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true); 621 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( 622 getLLVMContext(), llvm::AttributeList::FunctionIndex, 623 llvm::Attribute::NoReturn); 624 CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); 625 CS.setAttributes(NoReturnAttr); 626 return CS.getInstruction(); 627 } 628 } 629 llvm_unreachable("Incorrect MSVC intrinsic!"); 630 } 631 632 namespace { 633 // ARC cleanup for __builtin_os_log_format 634 struct CallObjCArcUse final : EHScopeStack::Cleanup { 635 CallObjCArcUse(llvm::Value *object) : object(object) {} 636 llvm::Value *object; 637 638 void Emit(CodeGenFunction &CGF, Flags flags) override { 639 CGF.EmitARCIntrinsicUse(object); 640 } 641 }; 642 } 643 644 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 645 unsigned BuiltinID, const CallExpr *E, 646 ReturnValueSlot ReturnValue) { 647 // See if we can constant fold this builtin. If so, don't emit it at all. 648 Expr::EvalResult Result; 649 if (E->EvaluateAsRValue(Result, CGM.getContext()) && 650 !Result.hasSideEffects()) { 651 if (Result.Val.isInt()) 652 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 653 Result.Val.getInt())); 654 if (Result.Val.isFloat()) 655 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 656 Result.Val.getFloat())); 657 } 658 659 switch (BuiltinID) { 660 default: break; // Handle intrinsics and libm functions below. 661 case Builtin::BI__builtin___CFStringMakeConstantString: 662 case Builtin::BI__builtin___NSStringMakeConstantString: 663 return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr)); 664 case Builtin::BI__builtin_stdarg_start: 665 case Builtin::BI__builtin_va_start: 666 case Builtin::BI__va_start: 667 case Builtin::BI__builtin_va_end: 668 return RValue::get( 669 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start 670 ? EmitScalarExpr(E->getArg(0)) 671 : EmitVAListRef(E->getArg(0)).getPointer(), 672 BuiltinID != Builtin::BI__builtin_va_end)); 673 case Builtin::BI__builtin_va_copy: { 674 Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer(); 675 Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer(); 676 677 llvm::Type *Type = Int8PtrTy; 678 679 DstPtr = Builder.CreateBitCast(DstPtr, Type); 680 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 681 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), 682 {DstPtr, SrcPtr})); 683 } 684 case Builtin::BI__builtin_abs: 685 case Builtin::BI__builtin_labs: 686 case Builtin::BI__builtin_llabs: { 687 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 688 689 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 690 Value *CmpResult = 691 Builder.CreateICmpSGE(ArgValue, 692 llvm::Constant::getNullValue(ArgValue->getType()), 693 "abscond"); 694 Value *Result = 695 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 696 697 return RValue::get(Result); 698 } 699 case Builtin::BI__builtin_fabs: 700 case Builtin::BI__builtin_fabsf: 701 case Builtin::BI__builtin_fabsl: { 702 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); 703 } 704 case Builtin::BI__builtin_fmod: 705 case Builtin::BI__builtin_fmodf: 706 case Builtin::BI__builtin_fmodl: { 707 Value *Arg1 = EmitScalarExpr(E->getArg(0)); 708 Value *Arg2 = EmitScalarExpr(E->getArg(1)); 709 Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod"); 710 return RValue::get(Result); 711 } 712 case Builtin::BI__builtin_copysign: 713 case Builtin::BI__builtin_copysignf: 714 case Builtin::BI__builtin_copysignl: { 715 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); 716 } 717 case Builtin::BI__builtin_ceil: 718 case Builtin::BI__builtin_ceilf: 719 case Builtin::BI__builtin_ceill: { 720 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil)); 721 } 722 case Builtin::BI__builtin_floor: 723 case Builtin::BI__builtin_floorf: 724 case Builtin::BI__builtin_floorl: { 725 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor)); 726 } 727 case Builtin::BI__builtin_trunc: 728 case Builtin::BI__builtin_truncf: 729 case Builtin::BI__builtin_truncl: { 730 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc)); 731 } 732 case Builtin::BI__builtin_rint: 733 case Builtin::BI__builtin_rintf: 734 case Builtin::BI__builtin_rintl: { 735 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint)); 736 } 737 case Builtin::BI__builtin_nearbyint: 738 case Builtin::BI__builtin_nearbyintf: 739 case Builtin::BI__builtin_nearbyintl: { 740 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint)); 741 } 742 case Builtin::BI__builtin_round: 743 case Builtin::BI__builtin_roundf: 744 case Builtin::BI__builtin_roundl: { 745 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round)); 746 } 747 case Builtin::BI__builtin_fmin: 748 case Builtin::BI__builtin_fminf: 749 case Builtin::BI__builtin_fminl: { 750 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum)); 751 } 752 case Builtin::BI__builtin_fmax: 753 case Builtin::BI__builtin_fmaxf: 754 case Builtin::BI__builtin_fmaxl: { 755 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum)); 756 } 757 case Builtin::BI__builtin_conj: 758 case Builtin::BI__builtin_conjf: 759 case Builtin::BI__builtin_conjl: { 760 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 761 Value *Real = ComplexVal.first; 762 Value *Imag = ComplexVal.second; 763 Value *Zero = 764 Imag->getType()->isFPOrFPVectorTy() 765 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) 766 : llvm::Constant::getNullValue(Imag->getType()); 767 768 Imag = Builder.CreateFSub(Zero, Imag, "sub"); 769 return RValue::getComplex(std::make_pair(Real, Imag)); 770 } 771 case Builtin::BI__builtin_creal: 772 case Builtin::BI__builtin_crealf: 773 case Builtin::BI__builtin_creall: 774 case Builtin::BIcreal: 775 case Builtin::BIcrealf: 776 case Builtin::BIcreall: { 777 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 778 return RValue::get(ComplexVal.first); 779 } 780 781 case Builtin::BI__builtin_cimag: 782 case Builtin::BI__builtin_cimagf: 783 case Builtin::BI__builtin_cimagl: 784 case Builtin::BIcimag: 785 case Builtin::BIcimagf: 786 case Builtin::BIcimagl: { 787 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 788 return RValue::get(ComplexVal.second); 789 } 790 791 case Builtin::BI__builtin_ctzs: 792 case Builtin::BI__builtin_ctz: 793 case Builtin::BI__builtin_ctzl: 794 case Builtin::BI__builtin_ctzll: { 795 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 796 797 llvm::Type *ArgType = ArgValue->getType(); 798 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 799 800 llvm::Type *ResultType = ConvertType(E->getType()); 801 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 802 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 803 if (Result->getType() != ResultType) 804 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 805 "cast"); 806 return RValue::get(Result); 807 } 808 case Builtin::BI__builtin_clzs: 809 case Builtin::BI__builtin_clz: 810 case Builtin::BI__builtin_clzl: 811 case Builtin::BI__builtin_clzll: { 812 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 813 814 llvm::Type *ArgType = ArgValue->getType(); 815 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 816 817 llvm::Type *ResultType = ConvertType(E->getType()); 818 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 819 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 820 if (Result->getType() != ResultType) 821 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 822 "cast"); 823 return RValue::get(Result); 824 } 825 case Builtin::BI__builtin_ffs: 826 case Builtin::BI__builtin_ffsl: 827 case Builtin::BI__builtin_ffsll: { 828 // ffs(x) -> x ? cttz(x) + 1 : 0 829 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 830 831 llvm::Type *ArgType = ArgValue->getType(); 832 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 833 834 llvm::Type *ResultType = ConvertType(E->getType()); 835 Value *Tmp = 836 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}), 837 llvm::ConstantInt::get(ArgType, 1)); 838 Value *Zero = llvm::Constant::getNullValue(ArgType); 839 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 840 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 841 if (Result->getType() != ResultType) 842 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 843 "cast"); 844 return RValue::get(Result); 845 } 846 case Builtin::BI__builtin_parity: 847 case Builtin::BI__builtin_parityl: 848 case Builtin::BI__builtin_parityll: { 849 // parity(x) -> ctpop(x) & 1 850 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 851 852 llvm::Type *ArgType = ArgValue->getType(); 853 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 854 855 llvm::Type *ResultType = ConvertType(E->getType()); 856 Value *Tmp = Builder.CreateCall(F, ArgValue); 857 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 858 if (Result->getType() != ResultType) 859 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 860 "cast"); 861 return RValue::get(Result); 862 } 863 case Builtin::BI__popcnt16: 864 case Builtin::BI__popcnt: 865 case Builtin::BI__popcnt64: 866 case Builtin::BI__builtin_popcount: 867 case Builtin::BI__builtin_popcountl: 868 case Builtin::BI__builtin_popcountll: { 869 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 870 871 llvm::Type *ArgType = ArgValue->getType(); 872 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 873 874 llvm::Type *ResultType = ConvertType(E->getType()); 875 Value *Result = Builder.CreateCall(F, ArgValue); 876 if (Result->getType() != ResultType) 877 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 878 "cast"); 879 return RValue::get(Result); 880 } 881 case Builtin::BI_rotr8: 882 case Builtin::BI_rotr16: 883 case Builtin::BI_rotr: 884 case Builtin::BI_lrotr: 885 case Builtin::BI_rotr64: { 886 Value *Val = EmitScalarExpr(E->getArg(0)); 887 Value *Shift = EmitScalarExpr(E->getArg(1)); 888 889 llvm::Type *ArgType = Val->getType(); 890 Shift = Builder.CreateIntCast(Shift, ArgType, false); 891 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 892 Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); 893 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 894 895 Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); 896 Shift = Builder.CreateAnd(Shift, Mask); 897 Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift); 898 899 Value *RightShifted = Builder.CreateLShr(Val, Shift); 900 Value *LeftShifted = Builder.CreateShl(Val, LeftShift); 901 Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); 902 903 Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); 904 Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); 905 return RValue::get(Result); 906 } 907 case Builtin::BI_rotl8: 908 case Builtin::BI_rotl16: 909 case Builtin::BI_rotl: 910 case Builtin::BI_lrotl: 911 case Builtin::BI_rotl64: { 912 Value *Val = EmitScalarExpr(E->getArg(0)); 913 Value *Shift = EmitScalarExpr(E->getArg(1)); 914 915 llvm::Type *ArgType = Val->getType(); 916 Shift = Builder.CreateIntCast(Shift, ArgType, false); 917 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); 918 Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); 919 Value *ArgZero = llvm::Constant::getNullValue(ArgType); 920 921 Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); 922 Shift = Builder.CreateAnd(Shift, Mask); 923 Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift); 924 925 Value *LeftShifted = Builder.CreateShl(Val, Shift); 926 Value *RightShifted = Builder.CreateLShr(Val, RightShift); 927 Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); 928 929 Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); 930 Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); 931 return RValue::get(Result); 932 } 933 case Builtin::BI__builtin_unpredictable: { 934 // Always return the argument of __builtin_unpredictable. LLVM does not 935 // handle this builtin. Metadata for this builtin should be added directly 936 // to instructions such as branches or switches that use it. 937 return RValue::get(EmitScalarExpr(E->getArg(0))); 938 } 939 case Builtin::BI__builtin_expect: { 940 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 941 llvm::Type *ArgType = ArgValue->getType(); 942 943 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 944 // Don't generate llvm.expect on -O0 as the backend won't use it for 945 // anything. 946 // Note, we still IRGen ExpectedValue because it could have side-effects. 947 if (CGM.getCodeGenOpts().OptimizationLevel == 0) 948 return RValue::get(ArgValue); 949 950 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); 951 Value *Result = 952 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); 953 return RValue::get(Result); 954 } 955 case Builtin::BI__builtin_assume_aligned: { 956 Value *PtrValue = EmitScalarExpr(E->getArg(0)); 957 Value *OffsetValue = 958 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; 959 960 Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); 961 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); 962 unsigned Alignment = (unsigned) AlignmentCI->getZExtValue(); 963 964 EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue); 965 return RValue::get(PtrValue); 966 } 967 case Builtin::BI__assume: 968 case Builtin::BI__builtin_assume: { 969 if (E->getArg(0)->HasSideEffects(getContext())) 970 return RValue::get(nullptr); 971 972 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 973 Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume); 974 return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); 975 } 976 case Builtin::BI__builtin_bswap16: 977 case Builtin::BI__builtin_bswap32: 978 case Builtin::BI__builtin_bswap64: { 979 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap)); 980 } 981 case Builtin::BI__builtin_bitreverse8: 982 case Builtin::BI__builtin_bitreverse16: 983 case Builtin::BI__builtin_bitreverse32: 984 case Builtin::BI__builtin_bitreverse64: { 985 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse)); 986 } 987 case Builtin::BI__builtin_object_size: { 988 unsigned Type = 989 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); 990 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType())); 991 992 // We pass this builtin onto the optimizer so that it can figure out the 993 // object size in more complex cases. 994 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType, 995 /*EmittedE=*/nullptr)); 996 } 997 case Builtin::BI__builtin_prefetch: { 998 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 999 // FIXME: Technically these constants should of type 'int', yes? 1000 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 1001 llvm::ConstantInt::get(Int32Ty, 0); 1002 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 1003 llvm::ConstantInt::get(Int32Ty, 3); 1004 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 1005 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 1006 return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); 1007 } 1008 case Builtin::BI__builtin_readcyclecounter: { 1009 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); 1010 return RValue::get(Builder.CreateCall(F)); 1011 } 1012 case Builtin::BI__builtin___clear_cache: { 1013 Value *Begin = EmitScalarExpr(E->getArg(0)); 1014 Value *End = EmitScalarExpr(E->getArg(1)); 1015 Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); 1016 return RValue::get(Builder.CreateCall(F, {Begin, End})); 1017 } 1018 case Builtin::BI__builtin_trap: 1019 return RValue::get(EmitTrapCall(Intrinsic::trap)); 1020 case Builtin::BI__debugbreak: 1021 return RValue::get(EmitTrapCall(Intrinsic::debugtrap)); 1022 case Builtin::BI__builtin_unreachable: { 1023 if (SanOpts.has(SanitizerKind::Unreachable)) { 1024 SanitizerScope SanScope(this); 1025 EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()), 1026 SanitizerKind::Unreachable), 1027 SanitizerHandler::BuiltinUnreachable, 1028 EmitCheckSourceLocation(E->getExprLoc()), None); 1029 } else 1030 Builder.CreateUnreachable(); 1031 1032 // We do need to preserve an insertion point. 1033 EmitBlock(createBasicBlock("unreachable.cont")); 1034 1035 return RValue::get(nullptr); 1036 } 1037 1038 case Builtin::BI__builtin_powi: 1039 case Builtin::BI__builtin_powif: 1040 case Builtin::BI__builtin_powil: { 1041 Value *Base = EmitScalarExpr(E->getArg(0)); 1042 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1043 llvm::Type *ArgType = Base->getType(); 1044 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); 1045 return RValue::get(Builder.CreateCall(F, {Base, Exponent})); 1046 } 1047 1048 case Builtin::BI__builtin_isgreater: 1049 case Builtin::BI__builtin_isgreaterequal: 1050 case Builtin::BI__builtin_isless: 1051 case Builtin::BI__builtin_islessequal: 1052 case Builtin::BI__builtin_islessgreater: 1053 case Builtin::BI__builtin_isunordered: { 1054 // Ordered comparisons: we know the arguments to these are matching scalar 1055 // floating point values. 1056 Value *LHS = EmitScalarExpr(E->getArg(0)); 1057 Value *RHS = EmitScalarExpr(E->getArg(1)); 1058 1059 switch (BuiltinID) { 1060 default: llvm_unreachable("Unknown ordered comparison"); 1061 case Builtin::BI__builtin_isgreater: 1062 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 1063 break; 1064 case Builtin::BI__builtin_isgreaterequal: 1065 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 1066 break; 1067 case Builtin::BI__builtin_isless: 1068 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 1069 break; 1070 case Builtin::BI__builtin_islessequal: 1071 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 1072 break; 1073 case Builtin::BI__builtin_islessgreater: 1074 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 1075 break; 1076 case Builtin::BI__builtin_isunordered: 1077 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 1078 break; 1079 } 1080 // ZExt bool to int type. 1081 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); 1082 } 1083 case Builtin::BI__builtin_isnan: { 1084 Value *V = EmitScalarExpr(E->getArg(0)); 1085 V = Builder.CreateFCmpUNO(V, V, "cmp"); 1086 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 1087 } 1088 1089 case Builtin::BIfinite: 1090 case Builtin::BI__finite: 1091 case Builtin::BIfinitef: 1092 case Builtin::BI__finitef: 1093 case Builtin::BIfinitel: 1094 case Builtin::BI__finitel: 1095 case Builtin::BI__builtin_isinf: 1096 case Builtin::BI__builtin_isfinite: { 1097 // isinf(x) --> fabs(x) == infinity 1098 // isfinite(x) --> fabs(x) != infinity 1099 // x != NaN via the ordered compare in either case. 1100 Value *V = EmitScalarExpr(E->getArg(0)); 1101 Value *Fabs = EmitFAbs(*this, V); 1102 Constant *Infinity = ConstantFP::getInfinity(V->getType()); 1103 CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf) 1104 ? CmpInst::FCMP_OEQ 1105 : CmpInst::FCMP_ONE; 1106 Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf"); 1107 return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType()))); 1108 } 1109 1110 case Builtin::BI__builtin_isinf_sign: { 1111 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 1112 Value *Arg = EmitScalarExpr(E->getArg(0)); 1113 Value *AbsArg = EmitFAbs(*this, Arg); 1114 Value *IsInf = Builder.CreateFCmpOEQ( 1115 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf"); 1116 Value *IsNeg = EmitSignBit(*this, Arg); 1117 1118 llvm::Type *IntTy = ConvertType(E->getType()); 1119 Value *Zero = Constant::getNullValue(IntTy); 1120 Value *One = ConstantInt::get(IntTy, 1); 1121 Value *NegativeOne = ConstantInt::get(IntTy, -1); 1122 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One); 1123 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero); 1124 return RValue::get(Result); 1125 } 1126 1127 case Builtin::BI__builtin_isnormal: { 1128 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 1129 Value *V = EmitScalarExpr(E->getArg(0)); 1130 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 1131 1132 Value *Abs = EmitFAbs(*this, V); 1133 Value *IsLessThanInf = 1134 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 1135 APFloat Smallest = APFloat::getSmallestNormalized( 1136 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 1137 Value *IsNormal = 1138 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 1139 "isnormal"); 1140 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 1141 V = Builder.CreateAnd(V, IsNormal, "and"); 1142 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 1143 } 1144 1145 case Builtin::BI__builtin_fpclassify: { 1146 Value *V = EmitScalarExpr(E->getArg(5)); 1147 llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 1148 1149 // Create Result 1150 BasicBlock *Begin = Builder.GetInsertBlock(); 1151 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 1152 Builder.SetInsertPoint(End); 1153 PHINode *Result = 1154 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 1155 "fpclassify_result"); 1156 1157 // if (V==0) return FP_ZERO 1158 Builder.SetInsertPoint(Begin); 1159 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 1160 "iszero"); 1161 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 1162 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 1163 Builder.CreateCondBr(IsZero, End, NotZero); 1164 Result->addIncoming(ZeroLiteral, Begin); 1165 1166 // if (V != V) return FP_NAN 1167 Builder.SetInsertPoint(NotZero); 1168 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 1169 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 1170 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 1171 Builder.CreateCondBr(IsNan, End, NotNan); 1172 Result->addIncoming(NanLiteral, NotZero); 1173 1174 // if (fabs(V) == infinity) return FP_INFINITY 1175 Builder.SetInsertPoint(NotNan); 1176 Value *VAbs = EmitFAbs(*this, V); 1177 Value *IsInf = 1178 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 1179 "isinf"); 1180 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 1181 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 1182 Builder.CreateCondBr(IsInf, End, NotInf); 1183 Result->addIncoming(InfLiteral, NotNan); 1184 1185 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 1186 Builder.SetInsertPoint(NotInf); 1187 APFloat Smallest = APFloat::getSmallestNormalized( 1188 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 1189 Value *IsNormal = 1190 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 1191 "isnormal"); 1192 Value *NormalResult = 1193 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 1194 EmitScalarExpr(E->getArg(3))); 1195 Builder.CreateBr(End); 1196 Result->addIncoming(NormalResult, NotInf); 1197 1198 // return Result 1199 Builder.SetInsertPoint(End); 1200 return RValue::get(Result); 1201 } 1202 1203 case Builtin::BIalloca: 1204 case Builtin::BI_alloca: 1205 case Builtin::BI__builtin_alloca: { 1206 Value *Size = EmitScalarExpr(E->getArg(0)); 1207 const TargetInfo &TI = getContext().getTargetInfo(); 1208 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__. 1209 unsigned SuitableAlignmentInBytes = 1210 CGM.getContext() 1211 .toCharUnitsFromBits(TI.getSuitableAlign()) 1212 .getQuantity(); 1213 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); 1214 AI->setAlignment(SuitableAlignmentInBytes); 1215 return RValue::get(AI); 1216 } 1217 1218 case Builtin::BI__builtin_alloca_with_align: { 1219 Value *Size = EmitScalarExpr(E->getArg(0)); 1220 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1)); 1221 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue); 1222 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue(); 1223 unsigned AlignmentInBytes = 1224 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity(); 1225 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); 1226 AI->setAlignment(AlignmentInBytes); 1227 return RValue::get(AI); 1228 } 1229 1230 case Builtin::BIbzero: 1231 case Builtin::BI__builtin_bzero: { 1232 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1233 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 1234 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1235 E->getArg(0)->getExprLoc(), FD, 0); 1236 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); 1237 return RValue::get(Dest.getPointer()); 1238 } 1239 case Builtin::BImemcpy: 1240 case Builtin::BI__builtin_memcpy: { 1241 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1242 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1243 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1244 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1245 E->getArg(0)->getExprLoc(), FD, 0); 1246 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 1247 E->getArg(1)->getExprLoc(), FD, 1); 1248 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 1249 return RValue::get(Dest.getPointer()); 1250 } 1251 1252 case Builtin::BI__builtin_char_memchr: 1253 BuiltinID = Builtin::BI__builtin_memchr; 1254 break; 1255 1256 case Builtin::BI__builtin___memcpy_chk: { 1257 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. 1258 llvm::APSInt Size, DstSize; 1259 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1260 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1261 break; 1262 if (Size.ugt(DstSize)) 1263 break; 1264 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1265 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1266 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1267 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 1268 return RValue::get(Dest.getPointer()); 1269 } 1270 1271 case Builtin::BI__builtin_objc_memmove_collectable: { 1272 Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); 1273 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1)); 1274 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1275 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 1276 DestAddr, SrcAddr, SizeVal); 1277 return RValue::get(DestAddr.getPointer()); 1278 } 1279 1280 case Builtin::BI__builtin___memmove_chk: { 1281 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. 1282 llvm::APSInt Size, DstSize; 1283 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1284 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1285 break; 1286 if (Size.ugt(DstSize)) 1287 break; 1288 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1289 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1290 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1291 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1292 return RValue::get(Dest.getPointer()); 1293 } 1294 1295 case Builtin::BImemmove: 1296 case Builtin::BI__builtin_memmove: { 1297 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1298 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1299 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1300 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1301 E->getArg(0)->getExprLoc(), FD, 0); 1302 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 1303 E->getArg(1)->getExprLoc(), FD, 1); 1304 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1305 return RValue::get(Dest.getPointer()); 1306 } 1307 case Builtin::BImemset: 1308 case Builtin::BI__builtin_memset: { 1309 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1310 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1311 Builder.getInt8Ty()); 1312 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1313 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1314 E->getArg(0)->getExprLoc(), FD, 0); 1315 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1316 return RValue::get(Dest.getPointer()); 1317 } 1318 case Builtin::BI__builtin___memset_chk: { 1319 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 1320 llvm::APSInt Size, DstSize; 1321 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1322 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1323 break; 1324 if (Size.ugt(DstSize)) 1325 break; 1326 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1327 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1328 Builder.getInt8Ty()); 1329 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1330 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1331 return RValue::get(Dest.getPointer()); 1332 } 1333 case Builtin::BI__builtin_dwarf_cfa: { 1334 // The offset in bytes from the first argument to the CFA. 1335 // 1336 // Why on earth is this in the frontend? Is there any reason at 1337 // all that the backend can't reasonably determine this while 1338 // lowering llvm.eh.dwarf.cfa()? 1339 // 1340 // TODO: If there's a satisfactory reason, add a target hook for 1341 // this instead of hard-coding 0, which is correct for most targets. 1342 int32_t Offset = 0; 1343 1344 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); 1345 return RValue::get(Builder.CreateCall(F, 1346 llvm::ConstantInt::get(Int32Ty, Offset))); 1347 } 1348 case Builtin::BI__builtin_return_address: { 1349 Value *Depth = 1350 CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this); 1351 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 1352 return RValue::get(Builder.CreateCall(F, Depth)); 1353 } 1354 case Builtin::BI_ReturnAddress: { 1355 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 1356 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); 1357 } 1358 case Builtin::BI__builtin_frame_address: { 1359 Value *Depth = 1360 CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this); 1361 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); 1362 return RValue::get(Builder.CreateCall(F, Depth)); 1363 } 1364 case Builtin::BI__builtin_extract_return_addr: { 1365 Value *Address = EmitScalarExpr(E->getArg(0)); 1366 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 1367 return RValue::get(Result); 1368 } 1369 case Builtin::BI__builtin_frob_return_addr: { 1370 Value *Address = EmitScalarExpr(E->getArg(0)); 1371 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 1372 return RValue::get(Result); 1373 } 1374 case Builtin::BI__builtin_dwarf_sp_column: { 1375 llvm::IntegerType *Ty 1376 = cast<llvm::IntegerType>(ConvertType(E->getType())); 1377 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 1378 if (Column == -1) { 1379 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 1380 return RValue::get(llvm::UndefValue::get(Ty)); 1381 } 1382 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 1383 } 1384 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 1385 Value *Address = EmitScalarExpr(E->getArg(0)); 1386 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 1387 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 1388 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 1389 } 1390 case Builtin::BI__builtin_eh_return: { 1391 Value *Int = EmitScalarExpr(E->getArg(0)); 1392 Value *Ptr = EmitScalarExpr(E->getArg(1)); 1393 1394 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 1395 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 1396 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 1397 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 1398 ? Intrinsic::eh_return_i32 1399 : Intrinsic::eh_return_i64); 1400 Builder.CreateCall(F, {Int, Ptr}); 1401 Builder.CreateUnreachable(); 1402 1403 // We do need to preserve an insertion point. 1404 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 1405 1406 return RValue::get(nullptr); 1407 } 1408 case Builtin::BI__builtin_unwind_init: { 1409 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); 1410 return RValue::get(Builder.CreateCall(F)); 1411 } 1412 case Builtin::BI__builtin_extend_pointer: { 1413 // Extends a pointer to the size of an _Unwind_Word, which is 1414 // uint64_t on all platforms. Generally this gets poked into a 1415 // register and eventually used as an address, so if the 1416 // addressing registers are wider than pointers and the platform 1417 // doesn't implicitly ignore high-order bits when doing 1418 // addressing, we need to make sure we zext / sext based on 1419 // the platform's expectations. 1420 // 1421 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 1422 1423 // Cast the pointer to intptr_t. 1424 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1425 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 1426 1427 // If that's 64 bits, we're done. 1428 if (IntPtrTy->getBitWidth() == 64) 1429 return RValue::get(Result); 1430 1431 // Otherwise, ask the codegen data what to do. 1432 if (getTargetHooks().extendPointerWithSExt()) 1433 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 1434 else 1435 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 1436 } 1437 case Builtin::BI__builtin_setjmp: { 1438 // Buffer is a void**. 1439 Address Buf = EmitPointerWithAlignment(E->getArg(0)); 1440 1441 // Store the frame pointer to the setjmp buffer. 1442 Value *FrameAddr = 1443 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 1444 ConstantInt::get(Int32Ty, 0)); 1445 Builder.CreateStore(FrameAddr, Buf); 1446 1447 // Store the stack pointer to the setjmp buffer. 1448 Value *StackAddr = 1449 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 1450 Address StackSaveSlot = 1451 Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize()); 1452 Builder.CreateStore(StackAddr, StackSaveSlot); 1453 1454 // Call LLVM's EH setjmp, which is lightweight. 1455 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 1456 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 1457 return RValue::get(Builder.CreateCall(F, Buf.getPointer())); 1458 } 1459 case Builtin::BI__builtin_longjmp: { 1460 Value *Buf = EmitScalarExpr(E->getArg(0)); 1461 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 1462 1463 // Call LLVM's EH longjmp, which is lightweight. 1464 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 1465 1466 // longjmp doesn't return; mark this as unreachable. 1467 Builder.CreateUnreachable(); 1468 1469 // We do need to preserve an insertion point. 1470 EmitBlock(createBasicBlock("longjmp.cont")); 1471 1472 return RValue::get(nullptr); 1473 } 1474 case Builtin::BI__sync_fetch_and_add: 1475 case Builtin::BI__sync_fetch_and_sub: 1476 case Builtin::BI__sync_fetch_and_or: 1477 case Builtin::BI__sync_fetch_and_and: 1478 case Builtin::BI__sync_fetch_and_xor: 1479 case Builtin::BI__sync_fetch_and_nand: 1480 case Builtin::BI__sync_add_and_fetch: 1481 case Builtin::BI__sync_sub_and_fetch: 1482 case Builtin::BI__sync_and_and_fetch: 1483 case Builtin::BI__sync_or_and_fetch: 1484 case Builtin::BI__sync_xor_and_fetch: 1485 case Builtin::BI__sync_nand_and_fetch: 1486 case Builtin::BI__sync_val_compare_and_swap: 1487 case Builtin::BI__sync_bool_compare_and_swap: 1488 case Builtin::BI__sync_lock_test_and_set: 1489 case Builtin::BI__sync_lock_release: 1490 case Builtin::BI__sync_swap: 1491 llvm_unreachable("Shouldn't make it through sema"); 1492 case Builtin::BI__sync_fetch_and_add_1: 1493 case Builtin::BI__sync_fetch_and_add_2: 1494 case Builtin::BI__sync_fetch_and_add_4: 1495 case Builtin::BI__sync_fetch_and_add_8: 1496 case Builtin::BI__sync_fetch_and_add_16: 1497 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); 1498 case Builtin::BI__sync_fetch_and_sub_1: 1499 case Builtin::BI__sync_fetch_and_sub_2: 1500 case Builtin::BI__sync_fetch_and_sub_4: 1501 case Builtin::BI__sync_fetch_and_sub_8: 1502 case Builtin::BI__sync_fetch_and_sub_16: 1503 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); 1504 case Builtin::BI__sync_fetch_and_or_1: 1505 case Builtin::BI__sync_fetch_and_or_2: 1506 case Builtin::BI__sync_fetch_and_or_4: 1507 case Builtin::BI__sync_fetch_and_or_8: 1508 case Builtin::BI__sync_fetch_and_or_16: 1509 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); 1510 case Builtin::BI__sync_fetch_and_and_1: 1511 case Builtin::BI__sync_fetch_and_and_2: 1512 case Builtin::BI__sync_fetch_and_and_4: 1513 case Builtin::BI__sync_fetch_and_and_8: 1514 case Builtin::BI__sync_fetch_and_and_16: 1515 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); 1516 case Builtin::BI__sync_fetch_and_xor_1: 1517 case Builtin::BI__sync_fetch_and_xor_2: 1518 case Builtin::BI__sync_fetch_and_xor_4: 1519 case Builtin::BI__sync_fetch_and_xor_8: 1520 case Builtin::BI__sync_fetch_and_xor_16: 1521 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); 1522 case Builtin::BI__sync_fetch_and_nand_1: 1523 case Builtin::BI__sync_fetch_and_nand_2: 1524 case Builtin::BI__sync_fetch_and_nand_4: 1525 case Builtin::BI__sync_fetch_and_nand_8: 1526 case Builtin::BI__sync_fetch_and_nand_16: 1527 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); 1528 1529 // Clang extensions: not overloaded yet. 1530 case Builtin::BI__sync_fetch_and_min: 1531 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); 1532 case Builtin::BI__sync_fetch_and_max: 1533 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); 1534 case Builtin::BI__sync_fetch_and_umin: 1535 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); 1536 case Builtin::BI__sync_fetch_and_umax: 1537 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); 1538 1539 case Builtin::BI__sync_add_and_fetch_1: 1540 case Builtin::BI__sync_add_and_fetch_2: 1541 case Builtin::BI__sync_add_and_fetch_4: 1542 case Builtin::BI__sync_add_and_fetch_8: 1543 case Builtin::BI__sync_add_and_fetch_16: 1544 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, 1545 llvm::Instruction::Add); 1546 case Builtin::BI__sync_sub_and_fetch_1: 1547 case Builtin::BI__sync_sub_and_fetch_2: 1548 case Builtin::BI__sync_sub_and_fetch_4: 1549 case Builtin::BI__sync_sub_and_fetch_8: 1550 case Builtin::BI__sync_sub_and_fetch_16: 1551 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, 1552 llvm::Instruction::Sub); 1553 case Builtin::BI__sync_and_and_fetch_1: 1554 case Builtin::BI__sync_and_and_fetch_2: 1555 case Builtin::BI__sync_and_and_fetch_4: 1556 case Builtin::BI__sync_and_and_fetch_8: 1557 case Builtin::BI__sync_and_and_fetch_16: 1558 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, 1559 llvm::Instruction::And); 1560 case Builtin::BI__sync_or_and_fetch_1: 1561 case Builtin::BI__sync_or_and_fetch_2: 1562 case Builtin::BI__sync_or_and_fetch_4: 1563 case Builtin::BI__sync_or_and_fetch_8: 1564 case Builtin::BI__sync_or_and_fetch_16: 1565 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, 1566 llvm::Instruction::Or); 1567 case Builtin::BI__sync_xor_and_fetch_1: 1568 case Builtin::BI__sync_xor_and_fetch_2: 1569 case Builtin::BI__sync_xor_and_fetch_4: 1570 case Builtin::BI__sync_xor_and_fetch_8: 1571 case Builtin::BI__sync_xor_and_fetch_16: 1572 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, 1573 llvm::Instruction::Xor); 1574 case Builtin::BI__sync_nand_and_fetch_1: 1575 case Builtin::BI__sync_nand_and_fetch_2: 1576 case Builtin::BI__sync_nand_and_fetch_4: 1577 case Builtin::BI__sync_nand_and_fetch_8: 1578 case Builtin::BI__sync_nand_and_fetch_16: 1579 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E, 1580 llvm::Instruction::And, true); 1581 1582 case Builtin::BI__sync_val_compare_and_swap_1: 1583 case Builtin::BI__sync_val_compare_and_swap_2: 1584 case Builtin::BI__sync_val_compare_and_swap_4: 1585 case Builtin::BI__sync_val_compare_and_swap_8: 1586 case Builtin::BI__sync_val_compare_and_swap_16: 1587 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false)); 1588 1589 case Builtin::BI__sync_bool_compare_and_swap_1: 1590 case Builtin::BI__sync_bool_compare_and_swap_2: 1591 case Builtin::BI__sync_bool_compare_and_swap_4: 1592 case Builtin::BI__sync_bool_compare_and_swap_8: 1593 case Builtin::BI__sync_bool_compare_and_swap_16: 1594 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true)); 1595 1596 case Builtin::BI__sync_swap_1: 1597 case Builtin::BI__sync_swap_2: 1598 case Builtin::BI__sync_swap_4: 1599 case Builtin::BI__sync_swap_8: 1600 case Builtin::BI__sync_swap_16: 1601 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1602 1603 case Builtin::BI__sync_lock_test_and_set_1: 1604 case Builtin::BI__sync_lock_test_and_set_2: 1605 case Builtin::BI__sync_lock_test_and_set_4: 1606 case Builtin::BI__sync_lock_test_and_set_8: 1607 case Builtin::BI__sync_lock_test_and_set_16: 1608 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1609 1610 case Builtin::BI__sync_lock_release_1: 1611 case Builtin::BI__sync_lock_release_2: 1612 case Builtin::BI__sync_lock_release_4: 1613 case Builtin::BI__sync_lock_release_8: 1614 case Builtin::BI__sync_lock_release_16: { 1615 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1616 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 1617 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 1618 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 1619 StoreSize.getQuantity() * 8); 1620 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 1621 llvm::StoreInst *Store = 1622 Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr, 1623 StoreSize); 1624 Store->setAtomic(llvm::AtomicOrdering::Release); 1625 return RValue::get(nullptr); 1626 } 1627 1628 case Builtin::BI__sync_synchronize: { 1629 // We assume this is supposed to correspond to a C++0x-style 1630 // sequentially-consistent fence (i.e. this is only usable for 1631 // synchonization, not device I/O or anything like that). This intrinsic 1632 // is really badly designed in the sense that in theory, there isn't 1633 // any way to safely use it... but in practice, it mostly works 1634 // to use it with non-atomic loads and stores to get acquire/release 1635 // semantics. 1636 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); 1637 return RValue::get(nullptr); 1638 } 1639 1640 case Builtin::BI__builtin_nontemporal_load: 1641 return RValue::get(EmitNontemporalLoad(*this, E)); 1642 case Builtin::BI__builtin_nontemporal_store: 1643 return RValue::get(EmitNontemporalStore(*this, E)); 1644 case Builtin::BI__c11_atomic_is_lock_free: 1645 case Builtin::BI__atomic_is_lock_free: { 1646 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the 1647 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since 1648 // _Atomic(T) is always properly-aligned. 1649 const char *LibCallName = "__atomic_is_lock_free"; 1650 CallArgList Args; 1651 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), 1652 getContext().getSizeType()); 1653 if (BuiltinID == Builtin::BI__atomic_is_lock_free) 1654 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), 1655 getContext().VoidPtrTy); 1656 else 1657 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), 1658 getContext().VoidPtrTy); 1659 const CGFunctionInfo &FuncInfo = 1660 CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); 1661 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 1662 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 1663 return EmitCall(FuncInfo, CGCallee::forDirect(Func), 1664 ReturnValueSlot(), Args); 1665 } 1666 1667 case Builtin::BI__atomic_test_and_set: { 1668 // Look at the argument type to determine whether this is a volatile 1669 // operation. The parameter type is always volatile. 1670 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1671 bool Volatile = 1672 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1673 1674 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1675 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1676 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1677 Value *NewVal = Builder.getInt8(1); 1678 Value *Order = EmitScalarExpr(E->getArg(1)); 1679 if (isa<llvm::ConstantInt>(Order)) { 1680 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1681 AtomicRMWInst *Result = nullptr; 1682 switch (ord) { 1683 case 0: // memory_order_relaxed 1684 default: // invalid order 1685 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1686 llvm::AtomicOrdering::Monotonic); 1687 break; 1688 case 1: // memory_order_consume 1689 case 2: // memory_order_acquire 1690 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1691 llvm::AtomicOrdering::Acquire); 1692 break; 1693 case 3: // memory_order_release 1694 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1695 llvm::AtomicOrdering::Release); 1696 break; 1697 case 4: // memory_order_acq_rel 1698 1699 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1700 llvm::AtomicOrdering::AcquireRelease); 1701 break; 1702 case 5: // memory_order_seq_cst 1703 Result = Builder.CreateAtomicRMW( 1704 llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1705 llvm::AtomicOrdering::SequentiallyConsistent); 1706 break; 1707 } 1708 Result->setVolatile(Volatile); 1709 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1710 } 1711 1712 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1713 1714 llvm::BasicBlock *BBs[5] = { 1715 createBasicBlock("monotonic", CurFn), 1716 createBasicBlock("acquire", CurFn), 1717 createBasicBlock("release", CurFn), 1718 createBasicBlock("acqrel", CurFn), 1719 createBasicBlock("seqcst", CurFn) 1720 }; 1721 llvm::AtomicOrdering Orders[5] = { 1722 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire, 1723 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease, 1724 llvm::AtomicOrdering::SequentiallyConsistent}; 1725 1726 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1727 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1728 1729 Builder.SetInsertPoint(ContBB); 1730 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); 1731 1732 for (unsigned i = 0; i < 5; ++i) { 1733 Builder.SetInsertPoint(BBs[i]); 1734 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1735 Ptr, NewVal, Orders[i]); 1736 RMW->setVolatile(Volatile); 1737 Result->addIncoming(RMW, BBs[i]); 1738 Builder.CreateBr(ContBB); 1739 } 1740 1741 SI->addCase(Builder.getInt32(0), BBs[0]); 1742 SI->addCase(Builder.getInt32(1), BBs[1]); 1743 SI->addCase(Builder.getInt32(2), BBs[1]); 1744 SI->addCase(Builder.getInt32(3), BBs[2]); 1745 SI->addCase(Builder.getInt32(4), BBs[3]); 1746 SI->addCase(Builder.getInt32(5), BBs[4]); 1747 1748 Builder.SetInsertPoint(ContBB); 1749 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1750 } 1751 1752 case Builtin::BI__atomic_clear: { 1753 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1754 bool Volatile = 1755 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1756 1757 Address Ptr = EmitPointerWithAlignment(E->getArg(0)); 1758 unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace(); 1759 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1760 Value *NewVal = Builder.getInt8(0); 1761 Value *Order = EmitScalarExpr(E->getArg(1)); 1762 if (isa<llvm::ConstantInt>(Order)) { 1763 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1764 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1765 switch (ord) { 1766 case 0: // memory_order_relaxed 1767 default: // invalid order 1768 Store->setOrdering(llvm::AtomicOrdering::Monotonic); 1769 break; 1770 case 3: // memory_order_release 1771 Store->setOrdering(llvm::AtomicOrdering::Release); 1772 break; 1773 case 5: // memory_order_seq_cst 1774 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent); 1775 break; 1776 } 1777 return RValue::get(nullptr); 1778 } 1779 1780 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1781 1782 llvm::BasicBlock *BBs[3] = { 1783 createBasicBlock("monotonic", CurFn), 1784 createBasicBlock("release", CurFn), 1785 createBasicBlock("seqcst", CurFn) 1786 }; 1787 llvm::AtomicOrdering Orders[3] = { 1788 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release, 1789 llvm::AtomicOrdering::SequentiallyConsistent}; 1790 1791 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1792 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1793 1794 for (unsigned i = 0; i < 3; ++i) { 1795 Builder.SetInsertPoint(BBs[i]); 1796 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1797 Store->setOrdering(Orders[i]); 1798 Builder.CreateBr(ContBB); 1799 } 1800 1801 SI->addCase(Builder.getInt32(0), BBs[0]); 1802 SI->addCase(Builder.getInt32(3), BBs[1]); 1803 SI->addCase(Builder.getInt32(5), BBs[2]); 1804 1805 Builder.SetInsertPoint(ContBB); 1806 return RValue::get(nullptr); 1807 } 1808 1809 case Builtin::BI__atomic_thread_fence: 1810 case Builtin::BI__atomic_signal_fence: 1811 case Builtin::BI__c11_atomic_thread_fence: 1812 case Builtin::BI__c11_atomic_signal_fence: { 1813 llvm::SynchronizationScope Scope; 1814 if (BuiltinID == Builtin::BI__atomic_signal_fence || 1815 BuiltinID == Builtin::BI__c11_atomic_signal_fence) 1816 Scope = llvm::SingleThread; 1817 else 1818 Scope = llvm::CrossThread; 1819 Value *Order = EmitScalarExpr(E->getArg(0)); 1820 if (isa<llvm::ConstantInt>(Order)) { 1821 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1822 switch (ord) { 1823 case 0: // memory_order_relaxed 1824 default: // invalid order 1825 break; 1826 case 1: // memory_order_consume 1827 case 2: // memory_order_acquire 1828 Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope); 1829 break; 1830 case 3: // memory_order_release 1831 Builder.CreateFence(llvm::AtomicOrdering::Release, Scope); 1832 break; 1833 case 4: // memory_order_acq_rel 1834 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope); 1835 break; 1836 case 5: // memory_order_seq_cst 1837 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 1838 Scope); 1839 break; 1840 } 1841 return RValue::get(nullptr); 1842 } 1843 1844 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; 1845 AcquireBB = createBasicBlock("acquire", CurFn); 1846 ReleaseBB = createBasicBlock("release", CurFn); 1847 AcqRelBB = createBasicBlock("acqrel", CurFn); 1848 SeqCstBB = createBasicBlock("seqcst", CurFn); 1849 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1850 1851 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1852 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); 1853 1854 Builder.SetInsertPoint(AcquireBB); 1855 Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope); 1856 Builder.CreateBr(ContBB); 1857 SI->addCase(Builder.getInt32(1), AcquireBB); 1858 SI->addCase(Builder.getInt32(2), AcquireBB); 1859 1860 Builder.SetInsertPoint(ReleaseBB); 1861 Builder.CreateFence(llvm::AtomicOrdering::Release, Scope); 1862 Builder.CreateBr(ContBB); 1863 SI->addCase(Builder.getInt32(3), ReleaseBB); 1864 1865 Builder.SetInsertPoint(AcqRelBB); 1866 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope); 1867 Builder.CreateBr(ContBB); 1868 SI->addCase(Builder.getInt32(4), AcqRelBB); 1869 1870 Builder.SetInsertPoint(SeqCstBB); 1871 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope); 1872 Builder.CreateBr(ContBB); 1873 SI->addCase(Builder.getInt32(5), SeqCstBB); 1874 1875 Builder.SetInsertPoint(ContBB); 1876 return RValue::get(nullptr); 1877 } 1878 1879 // Library functions with special handling. 1880 case Builtin::BIsqrt: 1881 case Builtin::BIsqrtf: 1882 case Builtin::BIsqrtl: { 1883 // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only 1884 // in finite- or unsafe-math mode (the intrinsic has different semantics 1885 // for handling negative numbers compared to the library function, so 1886 // -fmath-errno=0 is not enough). 1887 if (!FD->hasAttr<ConstAttr>()) 1888 break; 1889 if (!(CGM.getCodeGenOpts().UnsafeFPMath || 1890 CGM.getCodeGenOpts().NoNaNsFPMath)) 1891 break; 1892 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 1893 llvm::Type *ArgType = Arg0->getType(); 1894 Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType); 1895 return RValue::get(Builder.CreateCall(F, Arg0)); 1896 } 1897 1898 case Builtin::BI__builtin_pow: 1899 case Builtin::BI__builtin_powf: 1900 case Builtin::BI__builtin_powl: 1901 case Builtin::BIpow: 1902 case Builtin::BIpowf: 1903 case Builtin::BIpowl: { 1904 // Transform a call to pow* into a @llvm.pow.* intrinsic call. 1905 if (!FD->hasAttr<ConstAttr>()) 1906 break; 1907 Value *Base = EmitScalarExpr(E->getArg(0)); 1908 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1909 llvm::Type *ArgType = Base->getType(); 1910 Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType); 1911 return RValue::get(Builder.CreateCall(F, {Base, Exponent})); 1912 } 1913 1914 case Builtin::BIfma: 1915 case Builtin::BIfmaf: 1916 case Builtin::BIfmal: 1917 case Builtin::BI__builtin_fma: 1918 case Builtin::BI__builtin_fmaf: 1919 case Builtin::BI__builtin_fmal: { 1920 // Rewrite fma to intrinsic. 1921 Value *FirstArg = EmitScalarExpr(E->getArg(0)); 1922 llvm::Type *ArgType = FirstArg->getType(); 1923 Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType); 1924 return RValue::get( 1925 Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)), 1926 EmitScalarExpr(E->getArg(2))})); 1927 } 1928 1929 case Builtin::BI__builtin_signbit: 1930 case Builtin::BI__builtin_signbitf: 1931 case Builtin::BI__builtin_signbitl: { 1932 return RValue::get( 1933 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), 1934 ConvertType(E->getType()))); 1935 } 1936 case Builtin::BI__builtin_annotation: { 1937 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); 1938 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, 1939 AnnVal->getType()); 1940 1941 // Get the annotation string, go through casts. Sema requires this to be a 1942 // non-wide string literal, potentially casted, so the cast<> is safe. 1943 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); 1944 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); 1945 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); 1946 } 1947 case Builtin::BI__builtin_addcb: 1948 case Builtin::BI__builtin_addcs: 1949 case Builtin::BI__builtin_addc: 1950 case Builtin::BI__builtin_addcl: 1951 case Builtin::BI__builtin_addcll: 1952 case Builtin::BI__builtin_subcb: 1953 case Builtin::BI__builtin_subcs: 1954 case Builtin::BI__builtin_subc: 1955 case Builtin::BI__builtin_subcl: 1956 case Builtin::BI__builtin_subcll: { 1957 1958 // We translate all of these builtins from expressions of the form: 1959 // int x = ..., y = ..., carryin = ..., carryout, result; 1960 // result = __builtin_addc(x, y, carryin, &carryout); 1961 // 1962 // to LLVM IR of the form: 1963 // 1964 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) 1965 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 1966 // %carry1 = extractvalue {i32, i1} %tmp1, 1 1967 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, 1968 // i32 %carryin) 1969 // %result = extractvalue {i32, i1} %tmp2, 0 1970 // %carry2 = extractvalue {i32, i1} %tmp2, 1 1971 // %tmp3 = or i1 %carry1, %carry2 1972 // %tmp4 = zext i1 %tmp3 to i32 1973 // store i32 %tmp4, i32* %carryout 1974 1975 // Scalarize our inputs. 1976 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1977 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1978 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); 1979 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3)); 1980 1981 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. 1982 llvm::Intrinsic::ID IntrinsicId; 1983 switch (BuiltinID) { 1984 default: llvm_unreachable("Unknown multiprecision builtin id."); 1985 case Builtin::BI__builtin_addcb: 1986 case Builtin::BI__builtin_addcs: 1987 case Builtin::BI__builtin_addc: 1988 case Builtin::BI__builtin_addcl: 1989 case Builtin::BI__builtin_addcll: 1990 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 1991 break; 1992 case Builtin::BI__builtin_subcb: 1993 case Builtin::BI__builtin_subcs: 1994 case Builtin::BI__builtin_subc: 1995 case Builtin::BI__builtin_subcl: 1996 case Builtin::BI__builtin_subcll: 1997 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 1998 break; 1999 } 2000 2001 // Construct our resulting LLVM IR expression. 2002 llvm::Value *Carry1; 2003 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, 2004 X, Y, Carry1); 2005 llvm::Value *Carry2; 2006 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, 2007 Sum1, Carryin, Carry2); 2008 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), 2009 X->getType()); 2010 Builder.CreateStore(CarryOut, CarryOutPtr); 2011 return RValue::get(Sum2); 2012 } 2013 2014 case Builtin::BI__builtin_add_overflow: 2015 case Builtin::BI__builtin_sub_overflow: 2016 case Builtin::BI__builtin_mul_overflow: { 2017 const clang::Expr *LeftArg = E->getArg(0); 2018 const clang::Expr *RightArg = E->getArg(1); 2019 const clang::Expr *ResultArg = E->getArg(2); 2020 2021 clang::QualType ResultQTy = 2022 ResultArg->getType()->castAs<PointerType>()->getPointeeType(); 2023 2024 WidthAndSignedness LeftInfo = 2025 getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType()); 2026 WidthAndSignedness RightInfo = 2027 getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType()); 2028 WidthAndSignedness ResultInfo = 2029 getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy); 2030 WidthAndSignedness EncompassingInfo = 2031 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo}); 2032 2033 llvm::Type *EncompassingLLVMTy = 2034 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width); 2035 2036 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy); 2037 2038 llvm::Intrinsic::ID IntrinsicId; 2039 switch (BuiltinID) { 2040 default: 2041 llvm_unreachable("Unknown overflow builtin id."); 2042 case Builtin::BI__builtin_add_overflow: 2043 IntrinsicId = EncompassingInfo.Signed 2044 ? llvm::Intrinsic::sadd_with_overflow 2045 : llvm::Intrinsic::uadd_with_overflow; 2046 break; 2047 case Builtin::BI__builtin_sub_overflow: 2048 IntrinsicId = EncompassingInfo.Signed 2049 ? llvm::Intrinsic::ssub_with_overflow 2050 : llvm::Intrinsic::usub_with_overflow; 2051 break; 2052 case Builtin::BI__builtin_mul_overflow: 2053 IntrinsicId = EncompassingInfo.Signed 2054 ? llvm::Intrinsic::smul_with_overflow 2055 : llvm::Intrinsic::umul_with_overflow; 2056 break; 2057 } 2058 2059 llvm::Value *Left = EmitScalarExpr(LeftArg); 2060 llvm::Value *Right = EmitScalarExpr(RightArg); 2061 Address ResultPtr = EmitPointerWithAlignment(ResultArg); 2062 2063 // Extend each operand to the encompassing type. 2064 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed); 2065 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed); 2066 2067 // Perform the operation on the extended values. 2068 llvm::Value *Overflow, *Result; 2069 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow); 2070 2071 if (EncompassingInfo.Width > ResultInfo.Width) { 2072 // The encompassing type is wider than the result type, so we need to 2073 // truncate it. 2074 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy); 2075 2076 // To see if the truncation caused an overflow, we will extend 2077 // the result and then compare it to the original result. 2078 llvm::Value *ResultTruncExt = Builder.CreateIntCast( 2079 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed); 2080 llvm::Value *TruncationOverflow = 2081 Builder.CreateICmpNE(Result, ResultTruncExt); 2082 2083 Overflow = Builder.CreateOr(Overflow, TruncationOverflow); 2084 Result = ResultTrunc; 2085 } 2086 2087 // Finally, store the result using the pointer. 2088 bool isVolatile = 2089 ResultArg->getType()->getPointeeType().isVolatileQualified(); 2090 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile); 2091 2092 return RValue::get(Overflow); 2093 } 2094 2095 case Builtin::BI__builtin_uadd_overflow: 2096 case Builtin::BI__builtin_uaddl_overflow: 2097 case Builtin::BI__builtin_uaddll_overflow: 2098 case Builtin::BI__builtin_usub_overflow: 2099 case Builtin::BI__builtin_usubl_overflow: 2100 case Builtin::BI__builtin_usubll_overflow: 2101 case Builtin::BI__builtin_umul_overflow: 2102 case Builtin::BI__builtin_umull_overflow: 2103 case Builtin::BI__builtin_umulll_overflow: 2104 case Builtin::BI__builtin_sadd_overflow: 2105 case Builtin::BI__builtin_saddl_overflow: 2106 case Builtin::BI__builtin_saddll_overflow: 2107 case Builtin::BI__builtin_ssub_overflow: 2108 case Builtin::BI__builtin_ssubl_overflow: 2109 case Builtin::BI__builtin_ssubll_overflow: 2110 case Builtin::BI__builtin_smul_overflow: 2111 case Builtin::BI__builtin_smull_overflow: 2112 case Builtin::BI__builtin_smulll_overflow: { 2113 2114 // We translate all of these builtins directly to the relevant llvm IR node. 2115 2116 // Scalarize our inputs. 2117 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 2118 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 2119 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2)); 2120 2121 // Decide which of the overflow intrinsics we are lowering to: 2122 llvm::Intrinsic::ID IntrinsicId; 2123 switch (BuiltinID) { 2124 default: llvm_unreachable("Unknown overflow builtin id."); 2125 case Builtin::BI__builtin_uadd_overflow: 2126 case Builtin::BI__builtin_uaddl_overflow: 2127 case Builtin::BI__builtin_uaddll_overflow: 2128 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 2129 break; 2130 case Builtin::BI__builtin_usub_overflow: 2131 case Builtin::BI__builtin_usubl_overflow: 2132 case Builtin::BI__builtin_usubll_overflow: 2133 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 2134 break; 2135 case Builtin::BI__builtin_umul_overflow: 2136 case Builtin::BI__builtin_umull_overflow: 2137 case Builtin::BI__builtin_umulll_overflow: 2138 IntrinsicId = llvm::Intrinsic::umul_with_overflow; 2139 break; 2140 case Builtin::BI__builtin_sadd_overflow: 2141 case Builtin::BI__builtin_saddl_overflow: 2142 case Builtin::BI__builtin_saddll_overflow: 2143 IntrinsicId = llvm::Intrinsic::sadd_with_overflow; 2144 break; 2145 case Builtin::BI__builtin_ssub_overflow: 2146 case Builtin::BI__builtin_ssubl_overflow: 2147 case Builtin::BI__builtin_ssubll_overflow: 2148 IntrinsicId = llvm::Intrinsic::ssub_with_overflow; 2149 break; 2150 case Builtin::BI__builtin_smul_overflow: 2151 case Builtin::BI__builtin_smull_overflow: 2152 case Builtin::BI__builtin_smulll_overflow: 2153 IntrinsicId = llvm::Intrinsic::smul_with_overflow; 2154 break; 2155 } 2156 2157 2158 llvm::Value *Carry; 2159 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); 2160 Builder.CreateStore(Sum, SumOutPtr); 2161 2162 return RValue::get(Carry); 2163 } 2164 case Builtin::BI__builtin_addressof: 2165 return RValue::get(EmitLValue(E->getArg(0)).getPointer()); 2166 case Builtin::BI__builtin_operator_new: 2167 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 2168 E->getArg(0), false); 2169 case Builtin::BI__builtin_operator_delete: 2170 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 2171 E->getArg(0), true); 2172 case Builtin::BI__noop: 2173 // __noop always evaluates to an integer literal zero. 2174 return RValue::get(ConstantInt::get(IntTy, 0)); 2175 case Builtin::BI__builtin_call_with_static_chain: { 2176 const CallExpr *Call = cast<CallExpr>(E->getArg(0)); 2177 const Expr *Chain = E->getArg(1); 2178 return EmitCall(Call->getCallee()->getType(), 2179 EmitCallee(Call->getCallee()), Call, ReturnValue, 2180 EmitScalarExpr(Chain)); 2181 } 2182 case Builtin::BI_InterlockedExchange8: 2183 case Builtin::BI_InterlockedExchange16: 2184 case Builtin::BI_InterlockedExchange: 2185 case Builtin::BI_InterlockedExchangePointer: 2186 return RValue::get( 2187 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E)); 2188 case Builtin::BI_InterlockedCompareExchangePointer: { 2189 llvm::Type *RTy; 2190 llvm::IntegerType *IntType = 2191 IntegerType::get(getLLVMContext(), 2192 getContext().getTypeSize(E->getType())); 2193 llvm::Type *IntPtrType = IntType->getPointerTo(); 2194 2195 llvm::Value *Destination = 2196 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType); 2197 2198 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); 2199 RTy = Exchange->getType(); 2200 Exchange = Builder.CreatePtrToInt(Exchange, IntType); 2201 2202 llvm::Value *Comparand = 2203 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); 2204 2205 auto Result = 2206 Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, 2207 AtomicOrdering::SequentiallyConsistent, 2208 AtomicOrdering::SequentiallyConsistent); 2209 Result->setVolatile(true); 2210 2211 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, 2212 0), 2213 RTy)); 2214 } 2215 case Builtin::BI_InterlockedCompareExchange8: 2216 case Builtin::BI_InterlockedCompareExchange16: 2217 case Builtin::BI_InterlockedCompareExchange: 2218 case Builtin::BI_InterlockedCompareExchange64: { 2219 AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg( 2220 EmitScalarExpr(E->getArg(0)), 2221 EmitScalarExpr(E->getArg(2)), 2222 EmitScalarExpr(E->getArg(1)), 2223 AtomicOrdering::SequentiallyConsistent, 2224 AtomicOrdering::SequentiallyConsistent); 2225 CXI->setVolatile(true); 2226 return RValue::get(Builder.CreateExtractValue(CXI, 0)); 2227 } 2228 case Builtin::BI_InterlockedIncrement16: 2229 case Builtin::BI_InterlockedIncrement: 2230 return RValue::get( 2231 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E)); 2232 case Builtin::BI_InterlockedDecrement16: 2233 case Builtin::BI_InterlockedDecrement: 2234 return RValue::get( 2235 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E)); 2236 case Builtin::BI_InterlockedAnd8: 2237 case Builtin::BI_InterlockedAnd16: 2238 case Builtin::BI_InterlockedAnd: 2239 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E)); 2240 case Builtin::BI_InterlockedExchangeAdd8: 2241 case Builtin::BI_InterlockedExchangeAdd16: 2242 case Builtin::BI_InterlockedExchangeAdd: 2243 return RValue::get( 2244 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E)); 2245 case Builtin::BI_InterlockedExchangeSub8: 2246 case Builtin::BI_InterlockedExchangeSub16: 2247 case Builtin::BI_InterlockedExchangeSub: 2248 return RValue::get( 2249 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E)); 2250 case Builtin::BI_InterlockedOr8: 2251 case Builtin::BI_InterlockedOr16: 2252 case Builtin::BI_InterlockedOr: 2253 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E)); 2254 case Builtin::BI_InterlockedXor8: 2255 case Builtin::BI_InterlockedXor16: 2256 case Builtin::BI_InterlockedXor: 2257 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E)); 2258 case Builtin::BI_interlockedbittestandset: 2259 return RValue::get( 2260 EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E)); 2261 2262 case Builtin::BI__exception_code: 2263 case Builtin::BI_exception_code: 2264 return RValue::get(EmitSEHExceptionCode()); 2265 case Builtin::BI__exception_info: 2266 case Builtin::BI_exception_info: 2267 return RValue::get(EmitSEHExceptionInfo()); 2268 case Builtin::BI__abnormal_termination: 2269 case Builtin::BI_abnormal_termination: 2270 return RValue::get(EmitSEHAbnormalTermination()); 2271 case Builtin::BI_setjmpex: { 2272 if (getTarget().getTriple().isOSMSVCRT()) { 2273 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2274 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( 2275 getLLVMContext(), llvm::AttributeList::FunctionIndex, 2276 llvm::Attribute::ReturnsTwice); 2277 llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction( 2278 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2279 "_setjmpex", ReturnsTwiceAttr, /*Local=*/true); 2280 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2281 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2282 llvm::Value *FrameAddr = 2283 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2284 ConstantInt::get(Int32Ty, 0)); 2285 llvm::Value *Args[] = {Buf, FrameAddr}; 2286 llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args); 2287 CS.setAttributes(ReturnsTwiceAttr); 2288 return RValue::get(CS.getInstruction()); 2289 } 2290 break; 2291 } 2292 case Builtin::BI_setjmp: { 2293 if (getTarget().getTriple().isOSMSVCRT()) { 2294 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( 2295 getLLVMContext(), llvm::AttributeList::FunctionIndex, 2296 llvm::Attribute::ReturnsTwice); 2297 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2298 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2299 llvm::CallSite CS; 2300 if (getTarget().getTriple().getArch() == llvm::Triple::x86) { 2301 llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy}; 2302 llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction( 2303 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true), 2304 "_setjmp3", ReturnsTwiceAttr, /*Local=*/true); 2305 llvm::Value *Count = ConstantInt::get(IntTy, 0); 2306 llvm::Value *Args[] = {Buf, Count}; 2307 CS = EmitRuntimeCallOrInvoke(SetJmp3, Args); 2308 } else { 2309 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2310 llvm::Constant *SetJmp = CGM.CreateRuntimeFunction( 2311 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2312 "_setjmp", ReturnsTwiceAttr, /*Local=*/true); 2313 llvm::Value *FrameAddr = 2314 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2315 ConstantInt::get(Int32Ty, 0)); 2316 llvm::Value *Args[] = {Buf, FrameAddr}; 2317 CS = EmitRuntimeCallOrInvoke(SetJmp, Args); 2318 } 2319 CS.setAttributes(ReturnsTwiceAttr); 2320 return RValue::get(CS.getInstruction()); 2321 } 2322 break; 2323 } 2324 2325 case Builtin::BI__GetExceptionInfo: { 2326 if (llvm::GlobalVariable *GV = 2327 CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) 2328 return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy)); 2329 break; 2330 } 2331 2332 case Builtin::BI__fastfail: 2333 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E)); 2334 2335 case Builtin::BI__builtin_coro_size: { 2336 auto & Context = getContext(); 2337 auto SizeTy = Context.getSizeType(); 2338 auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); 2339 Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T); 2340 return RValue::get(Builder.CreateCall(F)); 2341 } 2342 2343 case Builtin::BI__builtin_coro_id: 2344 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id); 2345 case Builtin::BI__builtin_coro_promise: 2346 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise); 2347 case Builtin::BI__builtin_coro_resume: 2348 return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume); 2349 case Builtin::BI__builtin_coro_frame: 2350 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame); 2351 case Builtin::BI__builtin_coro_free: 2352 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free); 2353 case Builtin::BI__builtin_coro_destroy: 2354 return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy); 2355 case Builtin::BI__builtin_coro_done: 2356 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done); 2357 case Builtin::BI__builtin_coro_alloc: 2358 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc); 2359 case Builtin::BI__builtin_coro_begin: 2360 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin); 2361 case Builtin::BI__builtin_coro_end: 2362 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end); 2363 case Builtin::BI__builtin_coro_suspend: 2364 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend); 2365 case Builtin::BI__builtin_coro_param: 2366 return EmitCoroutineIntrinsic(E, Intrinsic::coro_param); 2367 2368 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions 2369 case Builtin::BIread_pipe: 2370 case Builtin::BIwrite_pipe: { 2371 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2372 *Arg1 = EmitScalarExpr(E->getArg(1)); 2373 CGOpenCLRuntime OpenCLRT(CGM); 2374 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2375 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2376 2377 // Type of the generic packet parameter. 2378 unsigned GenericAS = 2379 getContext().getTargetAddressSpace(LangAS::opencl_generic); 2380 llvm::Type *I8PTy = llvm::PointerType::get( 2381 llvm::Type::getInt8Ty(getLLVMContext()), GenericAS); 2382 2383 // Testing which overloaded version we should generate the call for. 2384 if (2U == E->getNumArgs()) { 2385 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2" 2386 : "__write_pipe_2"; 2387 // Creating a generic function type to be able to call with any builtin or 2388 // user defined type. 2389 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty}; 2390 llvm::FunctionType *FTy = llvm::FunctionType::get( 2391 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2392 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); 2393 return RValue::get( 2394 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2395 {Arg0, BCast, PacketSize, PacketAlign})); 2396 } else { 2397 assert(4 == E->getNumArgs() && 2398 "Illegal number of parameters to pipe function"); 2399 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4" 2400 : "__write_pipe_4"; 2401 2402 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy, 2403 Int32Ty, Int32Ty}; 2404 Value *Arg2 = EmitScalarExpr(E->getArg(2)), 2405 *Arg3 = EmitScalarExpr(E->getArg(3)); 2406 llvm::FunctionType *FTy = llvm::FunctionType::get( 2407 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2408 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy); 2409 // We know the third argument is an integer type, but we may need to cast 2410 // it to i32. 2411 if (Arg2->getType() != Int32Ty) 2412 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty); 2413 return RValue::get(Builder.CreateCall( 2414 CGM.CreateRuntimeFunction(FTy, Name), 2415 {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign})); 2416 } 2417 } 2418 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write 2419 // functions 2420 case Builtin::BIreserve_read_pipe: 2421 case Builtin::BIreserve_write_pipe: 2422 case Builtin::BIwork_group_reserve_read_pipe: 2423 case Builtin::BIwork_group_reserve_write_pipe: 2424 case Builtin::BIsub_group_reserve_read_pipe: 2425 case Builtin::BIsub_group_reserve_write_pipe: { 2426 // Composing the mangled name for the function. 2427 const char *Name; 2428 if (BuiltinID == Builtin::BIreserve_read_pipe) 2429 Name = "__reserve_read_pipe"; 2430 else if (BuiltinID == Builtin::BIreserve_write_pipe) 2431 Name = "__reserve_write_pipe"; 2432 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe) 2433 Name = "__work_group_reserve_read_pipe"; 2434 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe) 2435 Name = "__work_group_reserve_write_pipe"; 2436 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe) 2437 Name = "__sub_group_reserve_read_pipe"; 2438 else 2439 Name = "__sub_group_reserve_write_pipe"; 2440 2441 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2442 *Arg1 = EmitScalarExpr(E->getArg(1)); 2443 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy); 2444 CGOpenCLRuntime OpenCLRT(CGM); 2445 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2446 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2447 2448 // Building the generic function prototype. 2449 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty}; 2450 llvm::FunctionType *FTy = llvm::FunctionType::get( 2451 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2452 // We know the second argument is an integer type, but we may need to cast 2453 // it to i32. 2454 if (Arg1->getType() != Int32Ty) 2455 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty); 2456 return RValue::get( 2457 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2458 {Arg0, Arg1, PacketSize, PacketAlign})); 2459 } 2460 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write 2461 // functions 2462 case Builtin::BIcommit_read_pipe: 2463 case Builtin::BIcommit_write_pipe: 2464 case Builtin::BIwork_group_commit_read_pipe: 2465 case Builtin::BIwork_group_commit_write_pipe: 2466 case Builtin::BIsub_group_commit_read_pipe: 2467 case Builtin::BIsub_group_commit_write_pipe: { 2468 const char *Name; 2469 if (BuiltinID == Builtin::BIcommit_read_pipe) 2470 Name = "__commit_read_pipe"; 2471 else if (BuiltinID == Builtin::BIcommit_write_pipe) 2472 Name = "__commit_write_pipe"; 2473 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe) 2474 Name = "__work_group_commit_read_pipe"; 2475 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe) 2476 Name = "__work_group_commit_write_pipe"; 2477 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe) 2478 Name = "__sub_group_commit_read_pipe"; 2479 else 2480 Name = "__sub_group_commit_write_pipe"; 2481 2482 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2483 *Arg1 = EmitScalarExpr(E->getArg(1)); 2484 CGOpenCLRuntime OpenCLRT(CGM); 2485 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2486 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2487 2488 // Building the generic function prototype. 2489 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty}; 2490 llvm::FunctionType *FTy = 2491 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()), 2492 llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2493 2494 return RValue::get( 2495 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2496 {Arg0, Arg1, PacketSize, PacketAlign})); 2497 } 2498 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions 2499 case Builtin::BIget_pipe_num_packets: 2500 case Builtin::BIget_pipe_max_packets: { 2501 const char *Name; 2502 if (BuiltinID == Builtin::BIget_pipe_num_packets) 2503 Name = "__get_pipe_num_packets"; 2504 else 2505 Name = "__get_pipe_max_packets"; 2506 2507 // Building the generic function prototype. 2508 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 2509 CGOpenCLRuntime OpenCLRT(CGM); 2510 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); 2511 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); 2512 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty}; 2513 llvm::FunctionType *FTy = llvm::FunctionType::get( 2514 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2515 2516 return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2517 {Arg0, PacketSize, PacketAlign})); 2518 } 2519 2520 // OpenCL v2.0 s6.13.9 - Address space qualifier functions. 2521 case Builtin::BIto_global: 2522 case Builtin::BIto_local: 2523 case Builtin::BIto_private: { 2524 auto Arg0 = EmitScalarExpr(E->getArg(0)); 2525 auto NewArgT = llvm::PointerType::get(Int8Ty, 2526 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2527 auto NewRetT = llvm::PointerType::get(Int8Ty, 2528 CGM.getContext().getTargetAddressSpace( 2529 E->getType()->getPointeeType().getAddressSpace())); 2530 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false); 2531 llvm::Value *NewArg; 2532 if (Arg0->getType()->getPointerAddressSpace() != 2533 NewArgT->getPointerAddressSpace()) 2534 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT); 2535 else 2536 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT); 2537 auto NewName = std::string("__") + E->getDirectCallee()->getName().str(); 2538 auto NewCall = 2539 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg}); 2540 return RValue::get(Builder.CreateBitOrPointerCast(NewCall, 2541 ConvertType(E->getType()))); 2542 } 2543 2544 // OpenCL v2.0, s6.13.17 - Enqueue kernel function. 2545 // It contains four different overload formats specified in Table 6.13.17.1. 2546 case Builtin::BIenqueue_kernel: { 2547 StringRef Name; // Generated function call name 2548 unsigned NumArgs = E->getNumArgs(); 2549 2550 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); 2551 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 2552 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2553 2554 llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); 2555 llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); 2556 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2)); 2557 llvm::Value *Range = NDRangeL.getAddress().getPointer(); 2558 llvm::Type *RangeTy = NDRangeL.getAddress().getType(); 2559 2560 if (NumArgs == 4) { 2561 // The most basic form of the call with parameters: 2562 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) 2563 Name = "__enqueue_kernel_basic"; 2564 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy}; 2565 llvm::FunctionType *FTy = llvm::FunctionType::get( 2566 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false); 2567 2568 llvm::Value *Block = Builder.CreatePointerCast( 2569 EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); 2570 2571 AttrBuilder B; 2572 B.addAttribute(Attribute::ByVal); 2573 llvm::AttributeList ByValAttrSet = 2574 llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); 2575 2576 auto RTCall = 2577 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet), 2578 {Queue, Flags, Range, Block}); 2579 RTCall->setAttributes(ByValAttrSet); 2580 return RValue::get(RTCall); 2581 } 2582 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); 2583 2584 // Could have events and/or vaargs. 2585 if (E->getArg(3)->getType()->isBlockPointerType()) { 2586 // No events passed, but has variadic arguments. 2587 Name = "__enqueue_kernel_vaargs"; 2588 llvm::Value *Block = Builder.CreatePointerCast( 2589 EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); 2590 // Create a vector of the arguments, as well as a constant value to 2591 // express to the runtime the number of variadic arguments. 2592 std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block, 2593 ConstantInt::get(IntTy, NumArgs - 4)}; 2594 std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, 2595 GenericVoidPtrTy, IntTy}; 2596 2597 // Each of the following arguments specifies the size of the corresponding 2598 // argument passed to the enqueued block. 2599 for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I) 2600 Args.push_back( 2601 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy)); 2602 2603 llvm::FunctionType *FTy = llvm::FunctionType::get( 2604 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); 2605 return RValue::get( 2606 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2607 llvm::ArrayRef<llvm::Value *>(Args))); 2608 } 2609 // Any calls now have event arguments passed. 2610 if (NumArgs >= 7) { 2611 llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy); 2612 llvm::Type *EventPtrTy = EventTy->getPointerTo( 2613 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2614 2615 llvm::Value *NumEvents = 2616 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty); 2617 llvm::Value *EventList = 2618 E->getArg(4)->getType()->isArrayType() 2619 ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() 2620 : EmitScalarExpr(E->getArg(4)); 2621 llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); 2622 // Convert to generic address space. 2623 EventList = Builder.CreatePointerCast(EventList, EventPtrTy); 2624 ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy); 2625 llvm::Value *Block = Builder.CreatePointerCast( 2626 EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy); 2627 2628 std::vector<llvm::Type *> ArgTys = { 2629 QueueTy, Int32Ty, RangeTy, Int32Ty, 2630 EventPtrTy, EventPtrTy, GenericVoidPtrTy}; 2631 2632 std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, 2633 EventList, ClkEvent, Block}; 2634 2635 if (NumArgs == 7) { 2636 // Has events but no variadics. 2637 Name = "__enqueue_kernel_basic_events"; 2638 llvm::FunctionType *FTy = llvm::FunctionType::get( 2639 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2640 return RValue::get( 2641 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2642 llvm::ArrayRef<llvm::Value *>(Args))); 2643 } 2644 // Has event info and variadics 2645 // Pass the number of variadics to the runtime function too. 2646 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7)); 2647 ArgTys.push_back(Int32Ty); 2648 Name = "__enqueue_kernel_events_vaargs"; 2649 2650 // Each of the following arguments specifies the size of the corresponding 2651 // argument passed to the enqueued block. 2652 for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I) 2653 Args.push_back( 2654 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy)); 2655 2656 llvm::FunctionType *FTy = llvm::FunctionType::get( 2657 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); 2658 return RValue::get( 2659 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2660 llvm::ArrayRef<llvm::Value *>(Args))); 2661 } 2662 } 2663 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block 2664 // parameter. 2665 case Builtin::BIget_kernel_work_group_size: { 2666 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 2667 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2668 Value *Arg = EmitScalarExpr(E->getArg(0)); 2669 Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); 2670 return RValue::get(Builder.CreateCall( 2671 CGM.CreateRuntimeFunction( 2672 llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), 2673 "__get_kernel_work_group_size_impl"), 2674 Arg)); 2675 } 2676 case Builtin::BIget_kernel_preferred_work_group_size_multiple: { 2677 llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( 2678 getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2679 Value *Arg = EmitScalarExpr(E->getArg(0)); 2680 Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); 2681 return RValue::get(Builder.CreateCall( 2682 CGM.CreateRuntimeFunction( 2683 llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), 2684 "__get_kernel_preferred_work_group_multiple_impl"), 2685 Arg)); 2686 } 2687 case Builtin::BIprintf: 2688 if (getTarget().getTriple().isNVPTX()) 2689 return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue); 2690 break; 2691 case Builtin::BI__builtin_canonicalize: 2692 case Builtin::BI__builtin_canonicalizef: 2693 case Builtin::BI__builtin_canonicalizel: 2694 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize)); 2695 2696 case Builtin::BI__builtin_thread_pointer: { 2697 if (!getContext().getTargetInfo().isTLSSupported()) 2698 CGM.ErrorUnsupported(E, "__builtin_thread_pointer"); 2699 // Fall through - it's already mapped to the intrinsic by GCCBuiltin. 2700 break; 2701 } 2702 case Builtin::BI__builtin_os_log_format: { 2703 assert(E->getNumArgs() >= 2 && 2704 "__builtin_os_log_format takes at least 2 arguments"); 2705 analyze_os_log::OSLogBufferLayout Layout; 2706 analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout); 2707 Address BufAddr = EmitPointerWithAlignment(E->getArg(0)); 2708 // Ignore argument 1, the format string. It is not currently used. 2709 CharUnits Offset; 2710 Builder.CreateStore( 2711 Builder.getInt8(Layout.getSummaryByte()), 2712 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); 2713 Builder.CreateStore( 2714 Builder.getInt8(Layout.getNumArgsByte()), 2715 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs")); 2716 2717 llvm::SmallVector<llvm::Value *, 4> RetainableOperands; 2718 for (const auto &Item : Layout.Items) { 2719 Builder.CreateStore( 2720 Builder.getInt8(Item.getDescriptorByte()), 2721 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor")); 2722 Builder.CreateStore( 2723 Builder.getInt8(Item.getSizeByte()), 2724 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize")); 2725 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset); 2726 if (const Expr *TheExpr = Item.getExpr()) { 2727 Addr = Builder.CreateElementBitCast( 2728 Addr, ConvertTypeForMem(TheExpr->getType())); 2729 // Check if this is a retainable type. 2730 if (TheExpr->getType()->isObjCRetainableType()) { 2731 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar && 2732 "Only scalar can be a ObjC retainable type"); 2733 llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false); 2734 RValue RV = RValue::get(SV); 2735 LValue LV = MakeAddrLValue(Addr, TheExpr->getType()); 2736 EmitStoreThroughLValue(RV, LV); 2737 // Check if the object is constant, if not, save it in 2738 // RetainableOperands. 2739 if (!isa<Constant>(SV)) 2740 RetainableOperands.push_back(SV); 2741 } else { 2742 EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true); 2743 } 2744 } else { 2745 Addr = Builder.CreateElementBitCast(Addr, Int32Ty); 2746 Builder.CreateStore( 2747 Builder.getInt32(Item.getConstValue().getQuantity()), Addr); 2748 } 2749 Offset += Item.size(); 2750 } 2751 2752 // Push a clang.arc.use cleanup for each object in RetainableOperands. The 2753 // cleanup will cause the use to appear after the final log call, keeping 2754 // the object valid while it's held in the log buffer. Note that if there's 2755 // a release cleanup on the object, it will already be active; since 2756 // cleanups are emitted in reverse order, the use will occur before the 2757 // object is released. 2758 if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount && 2759 CGM.getCodeGenOpts().OptimizationLevel != 0) 2760 for (llvm::Value *object : RetainableOperands) 2761 pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object); 2762 2763 return RValue::get(BufAddr.getPointer()); 2764 } 2765 2766 case Builtin::BI__builtin_os_log_format_buffer_size: { 2767 analyze_os_log::OSLogBufferLayout Layout; 2768 analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout); 2769 return RValue::get(ConstantInt::get(ConvertType(E->getType()), 2770 Layout.size().getQuantity())); 2771 } 2772 2773 case Builtin::BI__xray_customevent: { 2774 if (!ShouldXRayInstrumentFunction()) 2775 return RValue::getIgnored(); 2776 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) { 2777 if (XRayAttr->neverXRayInstrument()) 2778 return RValue::getIgnored(); 2779 } 2780 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent); 2781 auto FTy = F->getFunctionType(); 2782 auto Arg0 = E->getArg(0); 2783 auto Arg0Val = EmitScalarExpr(Arg0); 2784 auto Arg0Ty = Arg0->getType(); 2785 auto PTy0 = FTy->getParamType(0); 2786 if (PTy0 != Arg0Val->getType()) { 2787 if (Arg0Ty->isArrayType()) 2788 Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer(); 2789 else 2790 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0); 2791 } 2792 auto Arg1 = EmitScalarExpr(E->getArg(1)); 2793 auto PTy1 = FTy->getParamType(1); 2794 if (PTy1 != Arg1->getType()) 2795 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1); 2796 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1})); 2797 } 2798 } 2799 2800 // If this is an alias for a lib function (e.g. __builtin_sin), emit 2801 // the call using the normal call path, but using the unmangled 2802 // version of the function name. 2803 if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) 2804 return emitLibraryCall(*this, FD, E, 2805 CGM.getBuiltinLibFunction(FD, BuiltinID)); 2806 2807 // If this is a predefined lib function (e.g. malloc), emit the call 2808 // using exactly the normal call path. 2809 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 2810 return emitLibraryCall(*this, FD, E, 2811 cast<llvm::Constant>(EmitScalarExpr(E->getCallee()))); 2812 2813 // Check that a call to a target specific builtin has the correct target 2814 // features. 2815 // This is down here to avoid non-target specific builtins, however, if 2816 // generic builtins start to require generic target features then we 2817 // can move this up to the beginning of the function. 2818 checkTargetFeatures(E, FD); 2819 2820 // See if we have a target specific intrinsic. 2821 const char *Name = getContext().BuiltinInfo.getName(BuiltinID); 2822 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 2823 StringRef Prefix = 2824 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()); 2825 if (!Prefix.empty()) { 2826 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name); 2827 // NOTE we dont need to perform a compatibility flag check here since the 2828 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the 2829 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. 2830 if (IntrinsicID == Intrinsic::not_intrinsic) 2831 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name); 2832 } 2833 2834 if (IntrinsicID != Intrinsic::not_intrinsic) { 2835 SmallVector<Value*, 16> Args; 2836 2837 // Find out if any arguments are required to be integer constant 2838 // expressions. 2839 unsigned ICEArguments = 0; 2840 ASTContext::GetBuiltinTypeError Error; 2841 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 2842 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 2843 2844 Function *F = CGM.getIntrinsic(IntrinsicID); 2845 llvm::FunctionType *FTy = F->getFunctionType(); 2846 2847 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 2848 Value *ArgValue; 2849 // If this is a normal argument, just emit it as a scalar. 2850 if ((ICEArguments & (1 << i)) == 0) { 2851 ArgValue = EmitScalarExpr(E->getArg(i)); 2852 } else { 2853 // If this is required to be a constant, constant fold it so that we 2854 // know that the generated intrinsic gets a ConstantInt. 2855 llvm::APSInt Result; 2856 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 2857 assert(IsConst && "Constant arg isn't actually constant?"); 2858 (void)IsConst; 2859 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 2860 } 2861 2862 // If the intrinsic arg type is different from the builtin arg type 2863 // we need to do a bit cast. 2864 llvm::Type *PTy = FTy->getParamType(i); 2865 if (PTy != ArgValue->getType()) { 2866 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 2867 "Must be able to losslessly bit cast to param"); 2868 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 2869 } 2870 2871 Args.push_back(ArgValue); 2872 } 2873 2874 Value *V = Builder.CreateCall(F, Args); 2875 QualType BuiltinRetType = E->getType(); 2876 2877 llvm::Type *RetTy = VoidTy; 2878 if (!BuiltinRetType->isVoidType()) 2879 RetTy = ConvertType(BuiltinRetType); 2880 2881 if (RetTy != V->getType()) { 2882 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 2883 "Must be able to losslessly bit cast result type"); 2884 V = Builder.CreateBitCast(V, RetTy); 2885 } 2886 2887 return RValue::get(V); 2888 } 2889 2890 // See if we have a target specific builtin that needs to be lowered. 2891 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 2892 return RValue::get(V); 2893 2894 ErrorUnsupported(E, "builtin function"); 2895 2896 // Unknown builtin, for now just dump it out and return undef. 2897 return GetUndefRValue(E->getType()); 2898 } 2899 2900 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, 2901 unsigned BuiltinID, const CallExpr *E, 2902 llvm::Triple::ArchType Arch) { 2903 switch (Arch) { 2904 case llvm::Triple::arm: 2905 case llvm::Triple::armeb: 2906 case llvm::Triple::thumb: 2907 case llvm::Triple::thumbeb: 2908 return CGF->EmitARMBuiltinExpr(BuiltinID, E); 2909 case llvm::Triple::aarch64: 2910 case llvm::Triple::aarch64_be: 2911 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E); 2912 case llvm::Triple::x86: 2913 case llvm::Triple::x86_64: 2914 return CGF->EmitX86BuiltinExpr(BuiltinID, E); 2915 case llvm::Triple::ppc: 2916 case llvm::Triple::ppc64: 2917 case llvm::Triple::ppc64le: 2918 return CGF->EmitPPCBuiltinExpr(BuiltinID, E); 2919 case llvm::Triple::r600: 2920 case llvm::Triple::amdgcn: 2921 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); 2922 case llvm::Triple::systemz: 2923 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E); 2924 case llvm::Triple::nvptx: 2925 case llvm::Triple::nvptx64: 2926 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E); 2927 case llvm::Triple::wasm32: 2928 case llvm::Triple::wasm64: 2929 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E); 2930 default: 2931 return nullptr; 2932 } 2933 } 2934 2935 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 2936 const CallExpr *E) { 2937 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) { 2938 assert(getContext().getAuxTargetInfo() && "Missing aux target info"); 2939 return EmitTargetArchBuiltinExpr( 2940 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E, 2941 getContext().getAuxTargetInfo()->getTriple().getArch()); 2942 } 2943 2944 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, 2945 getTarget().getTriple().getArch()); 2946 } 2947 2948 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, 2949 NeonTypeFlags TypeFlags, 2950 bool V1Ty=false) { 2951 int IsQuad = TypeFlags.isQuad(); 2952 switch (TypeFlags.getEltType()) { 2953 case NeonTypeFlags::Int8: 2954 case NeonTypeFlags::Poly8: 2955 return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); 2956 case NeonTypeFlags::Int16: 2957 case NeonTypeFlags::Poly16: 2958 case NeonTypeFlags::Float16: 2959 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 2960 case NeonTypeFlags::Int32: 2961 return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); 2962 case NeonTypeFlags::Int64: 2963 case NeonTypeFlags::Poly64: 2964 return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); 2965 case NeonTypeFlags::Poly128: 2966 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. 2967 // There is a lot of i128 and f128 API missing. 2968 // so we use v16i8 to represent poly128 and get pattern matched. 2969 return llvm::VectorType::get(CGF->Int8Ty, 16); 2970 case NeonTypeFlags::Float32: 2971 return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); 2972 case NeonTypeFlags::Float64: 2973 return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); 2974 } 2975 llvm_unreachable("Unknown vector element type!"); 2976 } 2977 2978 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF, 2979 NeonTypeFlags IntTypeFlags) { 2980 int IsQuad = IntTypeFlags.isQuad(); 2981 switch (IntTypeFlags.getEltType()) { 2982 case NeonTypeFlags::Int32: 2983 return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad)); 2984 case NeonTypeFlags::Int64: 2985 return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad)); 2986 default: 2987 llvm_unreachable("Type can't be converted to floating-point!"); 2988 } 2989 } 2990 2991 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 2992 unsigned nElts = V->getType()->getVectorNumElements(); 2993 Value* SV = llvm::ConstantVector::getSplat(nElts, C); 2994 return Builder.CreateShuffleVector(V, V, SV, "lane"); 2995 } 2996 2997 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 2998 const char *name, 2999 unsigned shift, bool rightshift) { 3000 unsigned j = 0; 3001 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 3002 ai != ae; ++ai, ++j) 3003 if (shift > 0 && shift == j) 3004 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 3005 else 3006 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 3007 3008 return Builder.CreateCall(F, Ops, name); 3009 } 3010 3011 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 3012 bool neg) { 3013 int SV = cast<ConstantInt>(V)->getSExtValue(); 3014 return ConstantInt::get(Ty, neg ? -SV : SV); 3015 } 3016 3017 // \brief Right-shift a vector by a constant. 3018 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, 3019 llvm::Type *Ty, bool usgn, 3020 const char *name) { 3021 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 3022 3023 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); 3024 int EltSize = VTy->getScalarSizeInBits(); 3025 3026 Vec = Builder.CreateBitCast(Vec, Ty); 3027 3028 // lshr/ashr are undefined when the shift amount is equal to the vector 3029 // element size. 3030 if (ShiftAmt == EltSize) { 3031 if (usgn) { 3032 // Right-shifting an unsigned value by its size yields 0. 3033 return llvm::ConstantAggregateZero::get(VTy); 3034 } else { 3035 // Right-shifting a signed value by its size is equivalent 3036 // to a shift of size-1. 3037 --ShiftAmt; 3038 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); 3039 } 3040 } 3041 3042 Shift = EmitNeonShiftVector(Shift, Ty, false); 3043 if (usgn) 3044 return Builder.CreateLShr(Vec, Shift, name); 3045 else 3046 return Builder.CreateAShr(Vec, Shift, name); 3047 } 3048 3049 enum { 3050 AddRetType = (1 << 0), 3051 Add1ArgType = (1 << 1), 3052 Add2ArgTypes = (1 << 2), 3053 3054 VectorizeRetType = (1 << 3), 3055 VectorizeArgTypes = (1 << 4), 3056 3057 InventFloatType = (1 << 5), 3058 UnsignedAlts = (1 << 6), 3059 3060 Use64BitVectors = (1 << 7), 3061 Use128BitVectors = (1 << 8), 3062 3063 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, 3064 VectorRet = AddRetType | VectorizeRetType, 3065 VectorRetGetArgs01 = 3066 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, 3067 FpCmpzModifiers = 3068 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType 3069 }; 3070 3071 namespace { 3072 struct NeonIntrinsicInfo { 3073 const char *NameHint; 3074 unsigned BuiltinID; 3075 unsigned LLVMIntrinsic; 3076 unsigned AltLLVMIntrinsic; 3077 unsigned TypeModifier; 3078 3079 bool operator<(unsigned RHSBuiltinID) const { 3080 return BuiltinID < RHSBuiltinID; 3081 } 3082 bool operator<(const NeonIntrinsicInfo &TE) const { 3083 return BuiltinID < TE.BuiltinID; 3084 } 3085 }; 3086 } // end anonymous namespace 3087 3088 #define NEONMAP0(NameBase) \ 3089 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 } 3090 3091 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 3092 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 3093 Intrinsic::LLVMIntrinsic, 0, TypeModifier } 3094 3095 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ 3096 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 3097 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ 3098 TypeModifier } 3099 3100 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { 3101 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 3102 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 3103 NEONMAP1(vabs_v, arm_neon_vabs, 0), 3104 NEONMAP1(vabsq_v, arm_neon_vabs, 0), 3105 NEONMAP0(vaddhn_v), 3106 NEONMAP1(vaesdq_v, arm_neon_aesd, 0), 3107 NEONMAP1(vaeseq_v, arm_neon_aese, 0), 3108 NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), 3109 NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), 3110 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), 3111 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), 3112 NEONMAP1(vcage_v, arm_neon_vacge, 0), 3113 NEONMAP1(vcageq_v, arm_neon_vacge, 0), 3114 NEONMAP1(vcagt_v, arm_neon_vacgt, 0), 3115 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0), 3116 NEONMAP1(vcale_v, arm_neon_vacge, 0), 3117 NEONMAP1(vcaleq_v, arm_neon_vacge, 0), 3118 NEONMAP1(vcalt_v, arm_neon_vacgt, 0), 3119 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), 3120 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), 3121 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), 3122 NEONMAP1(vclz_v, ctlz, Add1ArgType), 3123 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 3124 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 3125 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 3126 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0), 3127 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), 3128 NEONMAP0(vcvt_f32_v), 3129 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3130 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), 3131 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), 3132 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), 3133 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), 3134 NEONMAP0(vcvt_s32_v), 3135 NEONMAP0(vcvt_s64_v), 3136 NEONMAP0(vcvt_u32_v), 3137 NEONMAP0(vcvt_u64_v), 3138 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), 3139 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), 3140 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), 3141 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), 3142 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), 3143 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), 3144 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), 3145 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), 3146 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), 3147 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), 3148 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), 3149 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), 3150 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), 3151 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), 3152 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), 3153 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), 3154 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), 3155 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), 3156 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), 3157 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), 3158 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), 3159 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), 3160 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), 3161 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), 3162 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), 3163 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), 3164 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), 3165 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), 3166 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), 3167 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), 3168 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), 3169 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), 3170 NEONMAP0(vcvtq_f32_v), 3171 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 3172 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), 3173 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), 3174 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), 3175 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), 3176 NEONMAP0(vcvtq_s32_v), 3177 NEONMAP0(vcvtq_s64_v), 3178 NEONMAP0(vcvtq_u32_v), 3179 NEONMAP0(vcvtq_u64_v), 3180 NEONMAP0(vext_v), 3181 NEONMAP0(vextq_v), 3182 NEONMAP0(vfma_v), 3183 NEONMAP0(vfmaq_v), 3184 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 3185 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 3186 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 3187 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 3188 NEONMAP0(vld1_dup_v), 3189 NEONMAP1(vld1_v, arm_neon_vld1, 0), 3190 NEONMAP0(vld1q_dup_v), 3191 NEONMAP1(vld1q_v, arm_neon_vld1, 0), 3192 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), 3193 NEONMAP1(vld2_v, arm_neon_vld2, 0), 3194 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), 3195 NEONMAP1(vld2q_v, arm_neon_vld2, 0), 3196 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), 3197 NEONMAP1(vld3_v, arm_neon_vld3, 0), 3198 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), 3199 NEONMAP1(vld3q_v, arm_neon_vld3, 0), 3200 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), 3201 NEONMAP1(vld4_v, arm_neon_vld4, 0), 3202 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), 3203 NEONMAP1(vld4q_v, arm_neon_vld4, 0), 3204 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 3205 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), 3206 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), 3207 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 3208 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 3209 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), 3210 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), 3211 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 3212 NEONMAP0(vmovl_v), 3213 NEONMAP0(vmovn_v), 3214 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), 3215 NEONMAP0(vmull_v), 3216 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType), 3217 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 3218 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 3219 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType), 3220 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 3221 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 3222 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType), 3223 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts), 3224 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), 3225 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), 3226 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), 3227 NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 3228 NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 3229 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0), 3230 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0), 3231 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), 3232 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), 3233 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), 3234 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts), 3235 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), 3236 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), 3237 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), 3238 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), 3239 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), 3240 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 3241 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 3242 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 3243 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 3244 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 3245 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 3246 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), 3247 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), 3248 NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 3249 NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 3250 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), 3251 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 3252 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 3253 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType), 3254 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), 3255 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 3256 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 3257 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), 3258 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), 3259 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), 3260 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), 3261 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), 3262 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), 3263 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), 3264 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), 3265 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), 3266 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), 3267 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), 3268 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), 3269 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 3270 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 3271 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 3272 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 3273 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 3274 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 3275 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), 3276 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), 3277 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), 3278 NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0), 3279 NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0), 3280 NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0), 3281 NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0), 3282 NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0), 3283 NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0), 3284 NEONMAP0(vshl_n_v), 3285 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 3286 NEONMAP0(vshll_n_v), 3287 NEONMAP0(vshlq_n_v), 3288 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 3289 NEONMAP0(vshr_n_v), 3290 NEONMAP0(vshrn_n_v), 3291 NEONMAP0(vshrq_n_v), 3292 NEONMAP1(vst1_v, arm_neon_vst1, 0), 3293 NEONMAP1(vst1q_v, arm_neon_vst1, 0), 3294 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), 3295 NEONMAP1(vst2_v, arm_neon_vst2, 0), 3296 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), 3297 NEONMAP1(vst2q_v, arm_neon_vst2, 0), 3298 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0), 3299 NEONMAP1(vst3_v, arm_neon_vst3, 0), 3300 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0), 3301 NEONMAP1(vst3q_v, arm_neon_vst3, 0), 3302 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0), 3303 NEONMAP1(vst4_v, arm_neon_vst4, 0), 3304 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0), 3305 NEONMAP1(vst4q_v, arm_neon_vst4, 0), 3306 NEONMAP0(vsubhn_v), 3307 NEONMAP0(vtrn_v), 3308 NEONMAP0(vtrnq_v), 3309 NEONMAP0(vtst_v), 3310 NEONMAP0(vtstq_v), 3311 NEONMAP0(vuzp_v), 3312 NEONMAP0(vuzpq_v), 3313 NEONMAP0(vzip_v), 3314 NEONMAP0(vzipq_v) 3315 }; 3316 3317 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { 3318 NEONMAP1(vabs_v, aarch64_neon_abs, 0), 3319 NEONMAP1(vabsq_v, aarch64_neon_abs, 0), 3320 NEONMAP0(vaddhn_v), 3321 NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), 3322 NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), 3323 NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), 3324 NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), 3325 NEONMAP1(vcage_v, aarch64_neon_facge, 0), 3326 NEONMAP1(vcageq_v, aarch64_neon_facge, 0), 3327 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), 3328 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), 3329 NEONMAP1(vcale_v, aarch64_neon_facge, 0), 3330 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), 3331 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), 3332 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), 3333 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), 3334 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), 3335 NEONMAP1(vclz_v, ctlz, Add1ArgType), 3336 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 3337 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 3338 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 3339 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), 3340 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), 3341 NEONMAP0(vcvt_f32_v), 3342 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3343 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3344 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 3345 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 3346 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 3347 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 3348 NEONMAP0(vcvtq_f32_v), 3349 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3350 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 3351 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 3352 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 3353 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 3354 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 3355 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), 3356 NEONMAP0(vext_v), 3357 NEONMAP0(vextq_v), 3358 NEONMAP0(vfma_v), 3359 NEONMAP0(vfmaq_v), 3360 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 3361 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 3362 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 3363 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 3364 NEONMAP0(vmovl_v), 3365 NEONMAP0(vmovn_v), 3366 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), 3367 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), 3368 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), 3369 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 3370 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 3371 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), 3372 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), 3373 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), 3374 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 3375 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 3376 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), 3377 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), 3378 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), 3379 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), 3380 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), 3381 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), 3382 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), 3383 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), 3384 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), 3385 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), 3386 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), 3387 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 3388 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 3389 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), 3390 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 3391 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), 3392 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 3393 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), 3394 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), 3395 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 3396 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 3397 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), 3398 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 3399 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 3400 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), 3401 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), 3402 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 3403 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 3404 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 3405 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 3406 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 3407 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 3408 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 3409 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 3410 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), 3411 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), 3412 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), 3413 NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0), 3414 NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0), 3415 NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0), 3416 NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0), 3417 NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0), 3418 NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0), 3419 NEONMAP0(vshl_n_v), 3420 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 3421 NEONMAP0(vshll_n_v), 3422 NEONMAP0(vshlq_n_v), 3423 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 3424 NEONMAP0(vshr_n_v), 3425 NEONMAP0(vshrn_n_v), 3426 NEONMAP0(vshrq_n_v), 3427 NEONMAP0(vsubhn_v), 3428 NEONMAP0(vtst_v), 3429 NEONMAP0(vtstq_v), 3430 }; 3431 3432 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { 3433 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), 3434 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), 3435 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), 3436 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 3437 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 3438 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 3439 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 3440 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 3441 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 3442 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3443 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 3444 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), 3445 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 3446 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), 3447 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3448 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3449 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 3450 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 3451 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 3452 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 3453 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 3454 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 3455 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 3456 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 3457 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 3458 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 3459 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 3460 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 3461 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 3462 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 3463 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 3464 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 3465 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 3466 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 3467 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 3468 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 3469 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 3470 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 3471 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 3472 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 3473 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 3474 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 3475 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 3476 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 3477 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 3478 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 3479 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 3480 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 3481 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), 3482 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3483 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3484 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3485 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3486 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 3487 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 3488 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3489 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3490 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 3491 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 3492 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3493 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3494 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3495 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3496 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 3497 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 3498 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3499 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 3500 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 3501 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 3502 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), 3503 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), 3504 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), 3505 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3506 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3507 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3508 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3509 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3510 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3511 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3512 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3513 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 3514 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3515 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 3516 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), 3517 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 3518 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), 3519 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 3520 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 3521 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), 3522 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), 3523 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 3524 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 3525 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), 3526 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), 3527 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), 3528 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), 3529 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), 3530 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), 3531 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), 3532 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), 3533 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 3534 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 3535 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 3536 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 3537 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), 3538 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 3539 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 3540 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 3541 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), 3542 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 3543 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), 3544 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), 3545 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), 3546 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 3547 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 3548 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), 3549 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), 3550 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 3551 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 3552 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), 3553 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), 3554 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), 3555 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), 3556 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 3557 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 3558 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 3559 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 3560 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), 3561 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 3562 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 3563 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3564 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3565 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3566 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3567 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), 3568 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), 3569 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3570 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3571 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3572 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3573 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), 3574 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), 3575 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), 3576 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), 3577 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 3578 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 3579 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), 3580 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), 3581 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), 3582 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 3583 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 3584 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 3585 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 3586 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), 3587 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 3588 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 3589 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 3590 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 3591 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), 3592 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), 3593 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 3594 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 3595 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), 3596 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), 3597 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), 3598 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), 3599 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), 3600 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), 3601 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), 3602 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), 3603 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), 3604 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), 3605 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), 3606 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), 3607 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), 3608 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), 3609 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), 3610 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), 3611 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), 3612 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), 3613 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), 3614 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), 3615 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 3616 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), 3617 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 3618 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), 3619 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), 3620 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), 3621 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 3622 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), 3623 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 3624 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), 3625 }; 3626 3627 #undef NEONMAP0 3628 #undef NEONMAP1 3629 #undef NEONMAP2 3630 3631 static bool NEONSIMDIntrinsicsProvenSorted = false; 3632 3633 static bool AArch64SIMDIntrinsicsProvenSorted = false; 3634 static bool AArch64SISDIntrinsicsProvenSorted = false; 3635 3636 3637 static const NeonIntrinsicInfo * 3638 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap, 3639 unsigned BuiltinID, bool &MapProvenSorted) { 3640 3641 #ifndef NDEBUG 3642 if (!MapProvenSorted) { 3643 assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap))); 3644 MapProvenSorted = true; 3645 } 3646 #endif 3647 3648 const NeonIntrinsicInfo *Builtin = 3649 std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID); 3650 3651 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) 3652 return Builtin; 3653 3654 return nullptr; 3655 } 3656 3657 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, 3658 unsigned Modifier, 3659 llvm::Type *ArgType, 3660 const CallExpr *E) { 3661 int VectorSize = 0; 3662 if (Modifier & Use64BitVectors) 3663 VectorSize = 64; 3664 else if (Modifier & Use128BitVectors) 3665 VectorSize = 128; 3666 3667 // Return type. 3668 SmallVector<llvm::Type *, 3> Tys; 3669 if (Modifier & AddRetType) { 3670 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 3671 if (Modifier & VectorizeRetType) 3672 Ty = llvm::VectorType::get( 3673 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); 3674 3675 Tys.push_back(Ty); 3676 } 3677 3678 // Arguments. 3679 if (Modifier & VectorizeArgTypes) { 3680 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1; 3681 ArgType = llvm::VectorType::get(ArgType, Elts); 3682 } 3683 3684 if (Modifier & (Add1ArgType | Add2ArgTypes)) 3685 Tys.push_back(ArgType); 3686 3687 if (Modifier & Add2ArgTypes) 3688 Tys.push_back(ArgType); 3689 3690 if (Modifier & InventFloatType) 3691 Tys.push_back(FloatTy); 3692 3693 return CGM.getIntrinsic(IntrinsicID, Tys); 3694 } 3695 3696 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, 3697 const NeonIntrinsicInfo &SISDInfo, 3698 SmallVectorImpl<Value *> &Ops, 3699 const CallExpr *E) { 3700 unsigned BuiltinID = SISDInfo.BuiltinID; 3701 unsigned int Int = SISDInfo.LLVMIntrinsic; 3702 unsigned Modifier = SISDInfo.TypeModifier; 3703 const char *s = SISDInfo.NameHint; 3704 3705 switch (BuiltinID) { 3706 case NEON::BI__builtin_neon_vcled_s64: 3707 case NEON::BI__builtin_neon_vcled_u64: 3708 case NEON::BI__builtin_neon_vcles_f32: 3709 case NEON::BI__builtin_neon_vcled_f64: 3710 case NEON::BI__builtin_neon_vcltd_s64: 3711 case NEON::BI__builtin_neon_vcltd_u64: 3712 case NEON::BI__builtin_neon_vclts_f32: 3713 case NEON::BI__builtin_neon_vcltd_f64: 3714 case NEON::BI__builtin_neon_vcales_f32: 3715 case NEON::BI__builtin_neon_vcaled_f64: 3716 case NEON::BI__builtin_neon_vcalts_f32: 3717 case NEON::BI__builtin_neon_vcaltd_f64: 3718 // Only one direction of comparisons actually exist, cmle is actually a cmge 3719 // with swapped operands. The table gives us the right intrinsic but we 3720 // still need to do the swap. 3721 std::swap(Ops[0], Ops[1]); 3722 break; 3723 } 3724 3725 assert(Int && "Generic code assumes a valid intrinsic"); 3726 3727 // Determine the type(s) of this overloaded AArch64 intrinsic. 3728 const Expr *Arg = E->getArg(0); 3729 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); 3730 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); 3731 3732 int j = 0; 3733 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); 3734 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 3735 ai != ae; ++ai, ++j) { 3736 llvm::Type *ArgTy = ai->getType(); 3737 if (Ops[j]->getType()->getPrimitiveSizeInBits() == 3738 ArgTy->getPrimitiveSizeInBits()) 3739 continue; 3740 3741 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); 3742 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate 3743 // it before inserting. 3744 Ops[j] = 3745 CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType()); 3746 Ops[j] = 3747 CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0); 3748 } 3749 3750 Value *Result = CGF.EmitNeonCall(F, Ops, s); 3751 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 3752 if (ResultType->getPrimitiveSizeInBits() < 3753 Result->getType()->getPrimitiveSizeInBits()) 3754 return CGF.Builder.CreateExtractElement(Result, C0); 3755 3756 return CGF.Builder.CreateBitCast(Result, ResultType, s); 3757 } 3758 3759 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( 3760 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, 3761 const char *NameHint, unsigned Modifier, const CallExpr *E, 3762 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) { 3763 // Get the last argument, which specifies the vector type. 3764 llvm::APSInt NeonTypeConst; 3765 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 3766 if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext())) 3767 return nullptr; 3768 3769 // Determine the type of this overloaded NEON intrinsic. 3770 NeonTypeFlags Type(NeonTypeConst.getZExtValue()); 3771 bool Usgn = Type.isUnsigned(); 3772 bool Quad = Type.isQuad(); 3773 3774 llvm::VectorType *VTy = GetNeonType(this, Type); 3775 llvm::Type *Ty = VTy; 3776 if (!Ty) 3777 return nullptr; 3778 3779 auto getAlignmentValue32 = [&](Address addr) -> Value* { 3780 return Builder.getInt32(addr.getAlignment().getQuantity()); 3781 }; 3782 3783 unsigned Int = LLVMIntrinsic; 3784 if ((Modifier & UnsignedAlts) && !Usgn) 3785 Int = AltLLVMIntrinsic; 3786 3787 switch (BuiltinID) { 3788 default: break; 3789 case NEON::BI__builtin_neon_vabs_v: 3790 case NEON::BI__builtin_neon_vabsq_v: 3791 if (VTy->getElementType()->isFloatingPointTy()) 3792 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); 3793 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); 3794 case NEON::BI__builtin_neon_vaddhn_v: { 3795 llvm::VectorType *SrcTy = 3796 llvm::VectorType::getExtendedElementVectorType(VTy); 3797 3798 // %sum = add <4 x i32> %lhs, %rhs 3799 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3800 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 3801 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); 3802 3803 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 3804 Constant *ShiftAmt = 3805 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 3806 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); 3807 3808 // %res = trunc <4 x i32> %high to <4 x i16> 3809 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); 3810 } 3811 case NEON::BI__builtin_neon_vcale_v: 3812 case NEON::BI__builtin_neon_vcaleq_v: 3813 case NEON::BI__builtin_neon_vcalt_v: 3814 case NEON::BI__builtin_neon_vcaltq_v: 3815 std::swap(Ops[0], Ops[1]); 3816 case NEON::BI__builtin_neon_vcage_v: 3817 case NEON::BI__builtin_neon_vcageq_v: 3818 case NEON::BI__builtin_neon_vcagt_v: 3819 case NEON::BI__builtin_neon_vcagtq_v: { 3820 llvm::Type *VecFlt = llvm::VectorType::get( 3821 VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy, 3822 VTy->getNumElements()); 3823 llvm::Type *Tys[] = { VTy, VecFlt }; 3824 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3825 return EmitNeonCall(F, Ops, NameHint); 3826 } 3827 case NEON::BI__builtin_neon_vclz_v: 3828 case NEON::BI__builtin_neon_vclzq_v: 3829 // We generate target-independent intrinsic, which needs a second argument 3830 // for whether or not clz of zero is undefined; on ARM it isn't. 3831 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); 3832 break; 3833 case NEON::BI__builtin_neon_vcvt_f32_v: 3834 case NEON::BI__builtin_neon_vcvtq_f32_v: 3835 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3836 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad)); 3837 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 3838 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 3839 case NEON::BI__builtin_neon_vcvt_n_f32_v: 3840 case NEON::BI__builtin_neon_vcvt_n_f64_v: 3841 case NEON::BI__builtin_neon_vcvtq_n_f32_v: 3842 case NEON::BI__builtin_neon_vcvtq_n_f64_v: { 3843 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; 3844 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; 3845 Function *F = CGM.getIntrinsic(Int, Tys); 3846 return EmitNeonCall(F, Ops, "vcvt_n"); 3847 } 3848 case NEON::BI__builtin_neon_vcvt_n_s32_v: 3849 case NEON::BI__builtin_neon_vcvt_n_u32_v: 3850 case NEON::BI__builtin_neon_vcvt_n_s64_v: 3851 case NEON::BI__builtin_neon_vcvt_n_u64_v: 3852 case NEON::BI__builtin_neon_vcvtq_n_s32_v: 3853 case NEON::BI__builtin_neon_vcvtq_n_u32_v: 3854 case NEON::BI__builtin_neon_vcvtq_n_s64_v: 3855 case NEON::BI__builtin_neon_vcvtq_n_u64_v: { 3856 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 3857 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3858 return EmitNeonCall(F, Ops, "vcvt_n"); 3859 } 3860 case NEON::BI__builtin_neon_vcvt_s32_v: 3861 case NEON::BI__builtin_neon_vcvt_u32_v: 3862 case NEON::BI__builtin_neon_vcvt_s64_v: 3863 case NEON::BI__builtin_neon_vcvt_u64_v: 3864 case NEON::BI__builtin_neon_vcvtq_s32_v: 3865 case NEON::BI__builtin_neon_vcvtq_u32_v: 3866 case NEON::BI__builtin_neon_vcvtq_s64_v: 3867 case NEON::BI__builtin_neon_vcvtq_u64_v: { 3868 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 3869 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 3870 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 3871 } 3872 case NEON::BI__builtin_neon_vcvta_s32_v: 3873 case NEON::BI__builtin_neon_vcvta_s64_v: 3874 case NEON::BI__builtin_neon_vcvta_u32_v: 3875 case NEON::BI__builtin_neon_vcvta_u64_v: 3876 case NEON::BI__builtin_neon_vcvtaq_s32_v: 3877 case NEON::BI__builtin_neon_vcvtaq_s64_v: 3878 case NEON::BI__builtin_neon_vcvtaq_u32_v: 3879 case NEON::BI__builtin_neon_vcvtaq_u64_v: 3880 case NEON::BI__builtin_neon_vcvtn_s32_v: 3881 case NEON::BI__builtin_neon_vcvtn_s64_v: 3882 case NEON::BI__builtin_neon_vcvtn_u32_v: 3883 case NEON::BI__builtin_neon_vcvtn_u64_v: 3884 case NEON::BI__builtin_neon_vcvtnq_s32_v: 3885 case NEON::BI__builtin_neon_vcvtnq_s64_v: 3886 case NEON::BI__builtin_neon_vcvtnq_u32_v: 3887 case NEON::BI__builtin_neon_vcvtnq_u64_v: 3888 case NEON::BI__builtin_neon_vcvtp_s32_v: 3889 case NEON::BI__builtin_neon_vcvtp_s64_v: 3890 case NEON::BI__builtin_neon_vcvtp_u32_v: 3891 case NEON::BI__builtin_neon_vcvtp_u64_v: 3892 case NEON::BI__builtin_neon_vcvtpq_s32_v: 3893 case NEON::BI__builtin_neon_vcvtpq_s64_v: 3894 case NEON::BI__builtin_neon_vcvtpq_u32_v: 3895 case NEON::BI__builtin_neon_vcvtpq_u64_v: 3896 case NEON::BI__builtin_neon_vcvtm_s32_v: 3897 case NEON::BI__builtin_neon_vcvtm_s64_v: 3898 case NEON::BI__builtin_neon_vcvtm_u32_v: 3899 case NEON::BI__builtin_neon_vcvtm_u64_v: 3900 case NEON::BI__builtin_neon_vcvtmq_s32_v: 3901 case NEON::BI__builtin_neon_vcvtmq_s64_v: 3902 case NEON::BI__builtin_neon_vcvtmq_u32_v: 3903 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 3904 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 3905 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); 3906 } 3907 case NEON::BI__builtin_neon_vext_v: 3908 case NEON::BI__builtin_neon_vextq_v: { 3909 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 3910 SmallVector<uint32_t, 16> Indices; 3911 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 3912 Indices.push_back(i+CV); 3913 3914 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3915 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3916 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext"); 3917 } 3918 case NEON::BI__builtin_neon_vfma_v: 3919 case NEON::BI__builtin_neon_vfmaq_v: { 3920 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 3921 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3922 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3923 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3924 3925 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 3926 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 3927 } 3928 case NEON::BI__builtin_neon_vld1_v: 3929 case NEON::BI__builtin_neon_vld1q_v: { 3930 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 3931 Ops.push_back(getAlignmentValue32(PtrOp0)); 3932 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1"); 3933 } 3934 case NEON::BI__builtin_neon_vld2_v: 3935 case NEON::BI__builtin_neon_vld2q_v: 3936 case NEON::BI__builtin_neon_vld3_v: 3937 case NEON::BI__builtin_neon_vld3q_v: 3938 case NEON::BI__builtin_neon_vld4_v: 3939 case NEON::BI__builtin_neon_vld4q_v: { 3940 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 3941 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3942 Value *Align = getAlignmentValue32(PtrOp1); 3943 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint); 3944 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3945 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3946 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 3947 } 3948 case NEON::BI__builtin_neon_vld1_dup_v: 3949 case NEON::BI__builtin_neon_vld1q_dup_v: { 3950 Value *V = UndefValue::get(Ty); 3951 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 3952 PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty); 3953 LoadInst *Ld = Builder.CreateLoad(PtrOp0); 3954 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 3955 Ops[0] = Builder.CreateInsertElement(V, Ld, CI); 3956 return EmitNeonSplat(Ops[0], CI); 3957 } 3958 case NEON::BI__builtin_neon_vld2_lane_v: 3959 case NEON::BI__builtin_neon_vld2q_lane_v: 3960 case NEON::BI__builtin_neon_vld3_lane_v: 3961 case NEON::BI__builtin_neon_vld3q_lane_v: 3962 case NEON::BI__builtin_neon_vld4_lane_v: 3963 case NEON::BI__builtin_neon_vld4q_lane_v: { 3964 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 3965 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3966 for (unsigned I = 2; I < Ops.size() - 1; ++I) 3967 Ops[I] = Builder.CreateBitCast(Ops[I], Ty); 3968 Ops.push_back(getAlignmentValue32(PtrOp1)); 3969 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint); 3970 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3971 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3972 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 3973 } 3974 case NEON::BI__builtin_neon_vmovl_v: { 3975 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 3976 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 3977 if (Usgn) 3978 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 3979 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 3980 } 3981 case NEON::BI__builtin_neon_vmovn_v: { 3982 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 3983 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 3984 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 3985 } 3986 case NEON::BI__builtin_neon_vmull_v: 3987 // FIXME: the integer vmull operations could be emitted in terms of pure 3988 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of 3989 // hoisting the exts outside loops. Until global ISel comes along that can 3990 // see through such movement this leads to bad CodeGen. So we need an 3991 // intrinsic for now. 3992 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 3993 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 3994 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 3995 case NEON::BI__builtin_neon_vpadal_v: 3996 case NEON::BI__builtin_neon_vpadalq_v: { 3997 // The source operand type has twice as many elements of half the size. 3998 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 3999 llvm::Type *EltTy = 4000 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 4001 llvm::Type *NarrowTy = 4002 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 4003 llvm::Type *Tys[2] = { Ty, NarrowTy }; 4004 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 4005 } 4006 case NEON::BI__builtin_neon_vpaddl_v: 4007 case NEON::BI__builtin_neon_vpaddlq_v: { 4008 // The source operand type has twice as many elements of half the size. 4009 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 4010 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 4011 llvm::Type *NarrowTy = 4012 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 4013 llvm::Type *Tys[2] = { Ty, NarrowTy }; 4014 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); 4015 } 4016 case NEON::BI__builtin_neon_vqdmlal_v: 4017 case NEON::BI__builtin_neon_vqdmlsl_v: { 4018 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 4019 Ops[1] = 4020 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal"); 4021 Ops.resize(2); 4022 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint); 4023 } 4024 case NEON::BI__builtin_neon_vqshl_n_v: 4025 case NEON::BI__builtin_neon_vqshlq_n_v: 4026 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 4027 1, false); 4028 case NEON::BI__builtin_neon_vqshlu_n_v: 4029 case NEON::BI__builtin_neon_vqshluq_n_v: 4030 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 4031 1, false); 4032 case NEON::BI__builtin_neon_vrecpe_v: 4033 case NEON::BI__builtin_neon_vrecpeq_v: 4034 case NEON::BI__builtin_neon_vrsqrte_v: 4035 case NEON::BI__builtin_neon_vrsqrteq_v: 4036 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; 4037 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); 4038 4039 case NEON::BI__builtin_neon_vrshr_n_v: 4040 case NEON::BI__builtin_neon_vrshrq_n_v: 4041 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 4042 1, true); 4043 case NEON::BI__builtin_neon_vshl_n_v: 4044 case NEON::BI__builtin_neon_vshlq_n_v: 4045 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 4046 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], 4047 "vshl_n"); 4048 case NEON::BI__builtin_neon_vshll_n_v: { 4049 llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); 4050 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4051 if (Usgn) 4052 Ops[0] = Builder.CreateZExt(Ops[0], VTy); 4053 else 4054 Ops[0] = Builder.CreateSExt(Ops[0], VTy); 4055 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); 4056 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); 4057 } 4058 case NEON::BI__builtin_neon_vshrn_n_v: { 4059 llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); 4060 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4061 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); 4062 if (Usgn) 4063 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); 4064 else 4065 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); 4066 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); 4067 } 4068 case NEON::BI__builtin_neon_vshr_n_v: 4069 case NEON::BI__builtin_neon_vshrq_n_v: 4070 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n"); 4071 case NEON::BI__builtin_neon_vst1_v: 4072 case NEON::BI__builtin_neon_vst1q_v: 4073 case NEON::BI__builtin_neon_vst2_v: 4074 case NEON::BI__builtin_neon_vst2q_v: 4075 case NEON::BI__builtin_neon_vst3_v: 4076 case NEON::BI__builtin_neon_vst3q_v: 4077 case NEON::BI__builtin_neon_vst4_v: 4078 case NEON::BI__builtin_neon_vst4q_v: 4079 case NEON::BI__builtin_neon_vst2_lane_v: 4080 case NEON::BI__builtin_neon_vst2q_lane_v: 4081 case NEON::BI__builtin_neon_vst3_lane_v: 4082 case NEON::BI__builtin_neon_vst3q_lane_v: 4083 case NEON::BI__builtin_neon_vst4_lane_v: 4084 case NEON::BI__builtin_neon_vst4q_lane_v: { 4085 llvm::Type *Tys[] = {Int8PtrTy, Ty}; 4086 Ops.push_back(getAlignmentValue32(PtrOp0)); 4087 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 4088 } 4089 case NEON::BI__builtin_neon_vsubhn_v: { 4090 llvm::VectorType *SrcTy = 4091 llvm::VectorType::getExtendedElementVectorType(VTy); 4092 4093 // %sum = add <4 x i32> %lhs, %rhs 4094 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 4095 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 4096 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); 4097 4098 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 4099 Constant *ShiftAmt = 4100 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 4101 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); 4102 4103 // %res = trunc <4 x i32> %high to <4 x i16> 4104 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); 4105 } 4106 case NEON::BI__builtin_neon_vtrn_v: 4107 case NEON::BI__builtin_neon_vtrnq_v: { 4108 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4109 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4110 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4111 Value *SV = nullptr; 4112 4113 for (unsigned vi = 0; vi != 2; ++vi) { 4114 SmallVector<uint32_t, 16> Indices; 4115 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 4116 Indices.push_back(i+vi); 4117 Indices.push_back(i+e+vi); 4118 } 4119 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4120 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 4121 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4122 } 4123 return SV; 4124 } 4125 case NEON::BI__builtin_neon_vtst_v: 4126 case NEON::BI__builtin_neon_vtstq_v: { 4127 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4128 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4129 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 4130 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 4131 ConstantAggregateZero::get(Ty)); 4132 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 4133 } 4134 case NEON::BI__builtin_neon_vuzp_v: 4135 case NEON::BI__builtin_neon_vuzpq_v: { 4136 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4137 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4138 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4139 Value *SV = nullptr; 4140 4141 for (unsigned vi = 0; vi != 2; ++vi) { 4142 SmallVector<uint32_t, 16> Indices; 4143 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 4144 Indices.push_back(2*i+vi); 4145 4146 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4147 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 4148 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4149 } 4150 return SV; 4151 } 4152 case NEON::BI__builtin_neon_vzip_v: 4153 case NEON::BI__builtin_neon_vzipq_v: { 4154 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 4155 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4156 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4157 Value *SV = nullptr; 4158 4159 for (unsigned vi = 0; vi != 2; ++vi) { 4160 SmallVector<uint32_t, 16> Indices; 4161 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 4162 Indices.push_back((i + vi*e) >> 1); 4163 Indices.push_back(((i + vi*e) >> 1)+e); 4164 } 4165 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 4166 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 4167 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 4168 } 4169 return SV; 4170 } 4171 } 4172 4173 assert(Int && "Expected valid intrinsic number"); 4174 4175 // Determine the type(s) of this overloaded AArch64 intrinsic. 4176 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E); 4177 4178 Value *Result = EmitNeonCall(F, Ops, NameHint); 4179 llvm::Type *ResultType = ConvertType(E->getType()); 4180 // AArch64 intrinsic one-element vector type cast to 4181 // scalar type expected by the builtin 4182 return Builder.CreateBitCast(Result, ResultType, NameHint); 4183 } 4184 4185 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( 4186 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, 4187 const CmpInst::Predicate Ip, const Twine &Name) { 4188 llvm::Type *OTy = Op->getType(); 4189 4190 // FIXME: this is utterly horrific. We should not be looking at previous 4191 // codegen context to find out what needs doing. Unfortunately TableGen 4192 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32 4193 // (etc). 4194 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op)) 4195 OTy = BI->getOperand(0)->getType(); 4196 4197 Op = Builder.CreateBitCast(Op, OTy); 4198 if (OTy->getScalarType()->isFloatingPointTy()) { 4199 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); 4200 } else { 4201 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); 4202 } 4203 return Builder.CreateSExt(Op, Ty, Name); 4204 } 4205 4206 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 4207 Value *ExtOp, Value *IndexOp, 4208 llvm::Type *ResTy, unsigned IntID, 4209 const char *Name) { 4210 SmallVector<Value *, 2> TblOps; 4211 if (ExtOp) 4212 TblOps.push_back(ExtOp); 4213 4214 // Build a vector containing sequential number like (0, 1, 2, ..., 15) 4215 SmallVector<uint32_t, 16> Indices; 4216 llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); 4217 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { 4218 Indices.push_back(2*i); 4219 Indices.push_back(2*i+1); 4220 } 4221 4222 int PairPos = 0, End = Ops.size() - 1; 4223 while (PairPos < End) { 4224 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 4225 Ops[PairPos+1], Indices, 4226 Name)); 4227 PairPos += 2; 4228 } 4229 4230 // If there's an odd number of 64-bit lookup table, fill the high 64-bit 4231 // of the 128-bit lookup table with zero. 4232 if (PairPos == End) { 4233 Value *ZeroTbl = ConstantAggregateZero::get(TblTy); 4234 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 4235 ZeroTbl, Indices, Name)); 4236 } 4237 4238 Function *TblF; 4239 TblOps.push_back(IndexOp); 4240 TblF = CGF.CGM.getIntrinsic(IntID, ResTy); 4241 4242 return CGF.EmitNeonCall(TblF, TblOps, Name); 4243 } 4244 4245 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { 4246 unsigned Value; 4247 switch (BuiltinID) { 4248 default: 4249 return nullptr; 4250 case ARM::BI__builtin_arm_nop: 4251 Value = 0; 4252 break; 4253 case ARM::BI__builtin_arm_yield: 4254 case ARM::BI__yield: 4255 Value = 1; 4256 break; 4257 case ARM::BI__builtin_arm_wfe: 4258 case ARM::BI__wfe: 4259 Value = 2; 4260 break; 4261 case ARM::BI__builtin_arm_wfi: 4262 case ARM::BI__wfi: 4263 Value = 3; 4264 break; 4265 case ARM::BI__builtin_arm_sev: 4266 case ARM::BI__sev: 4267 Value = 4; 4268 break; 4269 case ARM::BI__builtin_arm_sevl: 4270 case ARM::BI__sevl: 4271 Value = 5; 4272 break; 4273 } 4274 4275 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 4276 llvm::ConstantInt::get(Int32Ty, Value)); 4277 } 4278 4279 // Generates the IR for the read/write special register builtin, 4280 // ValueType is the type of the value that is to be written or read, 4281 // RegisterType is the type of the register being written to or read from. 4282 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, 4283 const CallExpr *E, 4284 llvm::Type *RegisterType, 4285 llvm::Type *ValueType, 4286 bool IsRead, 4287 StringRef SysReg = "") { 4288 // write and register intrinsics only support 32 and 64 bit operations. 4289 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64)) 4290 && "Unsupported size for register."); 4291 4292 CodeGen::CGBuilderTy &Builder = CGF.Builder; 4293 CodeGen::CodeGenModule &CGM = CGF.CGM; 4294 LLVMContext &Context = CGM.getLLVMContext(); 4295 4296 if (SysReg.empty()) { 4297 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); 4298 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString(); 4299 } 4300 4301 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) }; 4302 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); 4303 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); 4304 4305 llvm::Type *Types[] = { RegisterType }; 4306 4307 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32); 4308 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64)) 4309 && "Can't fit 64-bit value in 32-bit register"); 4310 4311 if (IsRead) { 4312 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); 4313 llvm::Value *Call = Builder.CreateCall(F, Metadata); 4314 4315 if (MixedTypes) 4316 // Read into 64 bit register and then truncate result to 32 bit. 4317 return Builder.CreateTrunc(Call, ValueType); 4318 4319 if (ValueType->isPointerTy()) 4320 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*). 4321 return Builder.CreateIntToPtr(Call, ValueType); 4322 4323 return Call; 4324 } 4325 4326 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); 4327 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1)); 4328 if (MixedTypes) { 4329 // Extend 32 bit write value to 64 bit to pass to write. 4330 ArgValue = Builder.CreateZExt(ArgValue, RegisterType); 4331 return Builder.CreateCall(F, { Metadata, ArgValue }); 4332 } 4333 4334 if (ValueType->isPointerTy()) { 4335 // Have VoidPtrTy ArgValue but want to return an i32/i64. 4336 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType); 4337 return Builder.CreateCall(F, { Metadata, ArgValue }); 4338 } 4339 4340 return Builder.CreateCall(F, { Metadata, ArgValue }); 4341 } 4342 4343 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra 4344 /// argument that specifies the vector type. 4345 static bool HasExtraNeonArgument(unsigned BuiltinID) { 4346 switch (BuiltinID) { 4347 default: break; 4348 case NEON::BI__builtin_neon_vget_lane_i8: 4349 case NEON::BI__builtin_neon_vget_lane_i16: 4350 case NEON::BI__builtin_neon_vget_lane_i32: 4351 case NEON::BI__builtin_neon_vget_lane_i64: 4352 case NEON::BI__builtin_neon_vget_lane_f32: 4353 case NEON::BI__builtin_neon_vgetq_lane_i8: 4354 case NEON::BI__builtin_neon_vgetq_lane_i16: 4355 case NEON::BI__builtin_neon_vgetq_lane_i32: 4356 case NEON::BI__builtin_neon_vgetq_lane_i64: 4357 case NEON::BI__builtin_neon_vgetq_lane_f32: 4358 case NEON::BI__builtin_neon_vset_lane_i8: 4359 case NEON::BI__builtin_neon_vset_lane_i16: 4360 case NEON::BI__builtin_neon_vset_lane_i32: 4361 case NEON::BI__builtin_neon_vset_lane_i64: 4362 case NEON::BI__builtin_neon_vset_lane_f32: 4363 case NEON::BI__builtin_neon_vsetq_lane_i8: 4364 case NEON::BI__builtin_neon_vsetq_lane_i16: 4365 case NEON::BI__builtin_neon_vsetq_lane_i32: 4366 case NEON::BI__builtin_neon_vsetq_lane_i64: 4367 case NEON::BI__builtin_neon_vsetq_lane_f32: 4368 case NEON::BI__builtin_neon_vsha1h_u32: 4369 case NEON::BI__builtin_neon_vsha1cq_u32: 4370 case NEON::BI__builtin_neon_vsha1pq_u32: 4371 case NEON::BI__builtin_neon_vsha1mq_u32: 4372 case ARM::BI_MoveToCoprocessor: 4373 case ARM::BI_MoveToCoprocessor2: 4374 return false; 4375 } 4376 return true; 4377 } 4378 4379 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 4380 const CallExpr *E) { 4381 if (auto Hint = GetValueForARMHint(BuiltinID)) 4382 return Hint; 4383 4384 if (BuiltinID == ARM::BI__emit) { 4385 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb; 4386 llvm::FunctionType *FTy = 4387 llvm::FunctionType::get(VoidTy, /*Variadic=*/false); 4388 4389 APSInt Value; 4390 if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext())) 4391 llvm_unreachable("Sema will ensure that the parameter is constant"); 4392 4393 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue(); 4394 4395 llvm::InlineAsm *Emit = 4396 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", 4397 /*SideEffects=*/true) 4398 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", 4399 /*SideEffects=*/true); 4400 4401 return Builder.CreateCall(Emit); 4402 } 4403 4404 if (BuiltinID == ARM::BI__builtin_arm_dbg) { 4405 Value *Option = EmitScalarExpr(E->getArg(0)); 4406 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); 4407 } 4408 4409 if (BuiltinID == ARM::BI__builtin_arm_prefetch) { 4410 Value *Address = EmitScalarExpr(E->getArg(0)); 4411 Value *RW = EmitScalarExpr(E->getArg(1)); 4412 Value *IsData = EmitScalarExpr(E->getArg(2)); 4413 4414 // Locality is not supported on ARM target 4415 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); 4416 4417 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 4418 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 4419 } 4420 4421 if (BuiltinID == ARM::BI__builtin_arm_rbit) { 4422 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 4423 return Builder.CreateCall( 4424 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 4425 } 4426 4427 if (BuiltinID == ARM::BI__clear_cache) { 4428 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 4429 const FunctionDecl *FD = E->getDirectCallee(); 4430 Value *Ops[2]; 4431 for (unsigned i = 0; i < 2; i++) 4432 Ops[i] = EmitScalarExpr(E->getArg(i)); 4433 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 4434 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 4435 StringRef Name = FD->getName(); 4436 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 4437 } 4438 4439 if (BuiltinID == ARM::BI__builtin_arm_mcrr || 4440 BuiltinID == ARM::BI__builtin_arm_mcrr2) { 4441 Function *F; 4442 4443 switch (BuiltinID) { 4444 default: llvm_unreachable("unexpected builtin"); 4445 case ARM::BI__builtin_arm_mcrr: 4446 F = CGM.getIntrinsic(Intrinsic::arm_mcrr); 4447 break; 4448 case ARM::BI__builtin_arm_mcrr2: 4449 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2); 4450 break; 4451 } 4452 4453 // MCRR{2} instruction has 5 operands but 4454 // the intrinsic has 4 because Rt and Rt2 4455 // are represented as a single unsigned 64 4456 // bit integer in the intrinsic definition 4457 // but internally it's represented as 2 32 4458 // bit integers. 4459 4460 Value *Coproc = EmitScalarExpr(E->getArg(0)); 4461 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 4462 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2)); 4463 Value *CRm = EmitScalarExpr(E->getArg(3)); 4464 4465 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 4466 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty); 4467 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1); 4468 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty); 4469 4470 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm}); 4471 } 4472 4473 if (BuiltinID == ARM::BI__builtin_arm_mrrc || 4474 BuiltinID == ARM::BI__builtin_arm_mrrc2) { 4475 Function *F; 4476 4477 switch (BuiltinID) { 4478 default: llvm_unreachable("unexpected builtin"); 4479 case ARM::BI__builtin_arm_mrrc: 4480 F = CGM.getIntrinsic(Intrinsic::arm_mrrc); 4481 break; 4482 case ARM::BI__builtin_arm_mrrc2: 4483 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2); 4484 break; 4485 } 4486 4487 Value *Coproc = EmitScalarExpr(E->getArg(0)); 4488 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 4489 Value *CRm = EmitScalarExpr(E->getArg(2)); 4490 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm}); 4491 4492 // Returns an unsigned 64 bit integer, represented 4493 // as two 32 bit integers. 4494 4495 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1); 4496 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0); 4497 Rt = Builder.CreateZExt(Rt, Int64Ty); 4498 Rt1 = Builder.CreateZExt(Rt1, Int64Ty); 4499 4500 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32); 4501 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true); 4502 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1); 4503 4504 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType())); 4505 } 4506 4507 if (BuiltinID == ARM::BI__builtin_arm_ldrexd || 4508 ((BuiltinID == ARM::BI__builtin_arm_ldrex || 4509 BuiltinID == ARM::BI__builtin_arm_ldaex) && 4510 getContext().getTypeSize(E->getType()) == 64) || 4511 BuiltinID == ARM::BI__ldrexd) { 4512 Function *F; 4513 4514 switch (BuiltinID) { 4515 default: llvm_unreachable("unexpected builtin"); 4516 case ARM::BI__builtin_arm_ldaex: 4517 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); 4518 break; 4519 case ARM::BI__builtin_arm_ldrexd: 4520 case ARM::BI__builtin_arm_ldrex: 4521 case ARM::BI__ldrexd: 4522 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 4523 break; 4524 } 4525 4526 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 4527 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 4528 "ldrexd"); 4529 4530 Value *Val0 = Builder.CreateExtractValue(Val, 1); 4531 Value *Val1 = Builder.CreateExtractValue(Val, 0); 4532 Val0 = Builder.CreateZExt(Val0, Int64Ty); 4533 Val1 = Builder.CreateZExt(Val1, Int64Ty); 4534 4535 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 4536 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 4537 Val = Builder.CreateOr(Val, Val1); 4538 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 4539 } 4540 4541 if (BuiltinID == ARM::BI__builtin_arm_ldrex || 4542 BuiltinID == ARM::BI__builtin_arm_ldaex) { 4543 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 4544 4545 QualType Ty = E->getType(); 4546 llvm::Type *RealResTy = ConvertType(Ty); 4547 llvm::Type *PtrTy = llvm::IntegerType::get( 4548 getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); 4549 LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); 4550 4551 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex 4552 ? Intrinsic::arm_ldaex 4553 : Intrinsic::arm_ldrex, 4554 PtrTy); 4555 Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); 4556 4557 if (RealResTy->isPointerTy()) 4558 return Builder.CreateIntToPtr(Val, RealResTy); 4559 else { 4560 llvm::Type *IntResTy = llvm::IntegerType::get( 4561 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); 4562 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 4563 return Builder.CreateBitCast(Val, RealResTy); 4564 } 4565 } 4566 4567 if (BuiltinID == ARM::BI__builtin_arm_strexd || 4568 ((BuiltinID == ARM::BI__builtin_arm_stlex || 4569 BuiltinID == ARM::BI__builtin_arm_strex) && 4570 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { 4571 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 4572 ? Intrinsic::arm_stlexd 4573 : Intrinsic::arm_strexd); 4574 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty); 4575 4576 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 4577 Value *Val = EmitScalarExpr(E->getArg(0)); 4578 Builder.CreateStore(Val, Tmp); 4579 4580 Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 4581 Val = Builder.CreateLoad(LdPtr); 4582 4583 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 4584 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 4585 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); 4586 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd"); 4587 } 4588 4589 if (BuiltinID == ARM::BI__builtin_arm_strex || 4590 BuiltinID == ARM::BI__builtin_arm_stlex) { 4591 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 4592 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 4593 4594 QualType Ty = E->getArg(0)->getType(); 4595 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 4596 getContext().getTypeSize(Ty)); 4597 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 4598 4599 if (StoreVal->getType()->isPointerTy()) 4600 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); 4601 else { 4602 llvm::Type *IntTy = llvm::IntegerType::get( 4603 getLLVMContext(), 4604 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); 4605 StoreVal = Builder.CreateBitCast(StoreVal, IntTy); 4606 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); 4607 } 4608 4609 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 4610 ? Intrinsic::arm_stlex 4611 : Intrinsic::arm_strex, 4612 StoreAddr->getType()); 4613 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); 4614 } 4615 4616 switch (BuiltinID) { 4617 case ARM::BI__iso_volatile_load8: 4618 case ARM::BI__iso_volatile_load16: 4619 case ARM::BI__iso_volatile_load32: 4620 case ARM::BI__iso_volatile_load64: { 4621 Value *Ptr = EmitScalarExpr(E->getArg(0)); 4622 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 4623 CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy); 4624 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 4625 LoadSize.getQuantity() * 8); 4626 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 4627 llvm::LoadInst *Load = 4628 Builder.CreateAlignedLoad(Ptr, LoadSize); 4629 Load->setVolatile(true); 4630 return Load; 4631 } 4632 case ARM::BI__iso_volatile_store8: 4633 case ARM::BI__iso_volatile_store16: 4634 case ARM::BI__iso_volatile_store32: 4635 case ARM::BI__iso_volatile_store64: { 4636 Value *Ptr = EmitScalarExpr(E->getArg(0)); 4637 Value *Value = EmitScalarExpr(E->getArg(1)); 4638 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 4639 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 4640 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 4641 StoreSize.getQuantity() * 8); 4642 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 4643 llvm::StoreInst *Store = 4644 Builder.CreateAlignedStore(Value, Ptr, 4645 StoreSize); 4646 Store->setVolatile(true); 4647 return Store; 4648 } 4649 } 4650 4651 if (BuiltinID == ARM::BI__builtin_arm_clrex) { 4652 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); 4653 return Builder.CreateCall(F); 4654 } 4655 4656 // CRC32 4657 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 4658 switch (BuiltinID) { 4659 case ARM::BI__builtin_arm_crc32b: 4660 CRCIntrinsicID = Intrinsic::arm_crc32b; break; 4661 case ARM::BI__builtin_arm_crc32cb: 4662 CRCIntrinsicID = Intrinsic::arm_crc32cb; break; 4663 case ARM::BI__builtin_arm_crc32h: 4664 CRCIntrinsicID = Intrinsic::arm_crc32h; break; 4665 case ARM::BI__builtin_arm_crc32ch: 4666 CRCIntrinsicID = Intrinsic::arm_crc32ch; break; 4667 case ARM::BI__builtin_arm_crc32w: 4668 case ARM::BI__builtin_arm_crc32d: 4669 CRCIntrinsicID = Intrinsic::arm_crc32w; break; 4670 case ARM::BI__builtin_arm_crc32cw: 4671 case ARM::BI__builtin_arm_crc32cd: 4672 CRCIntrinsicID = Intrinsic::arm_crc32cw; break; 4673 } 4674 4675 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 4676 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 4677 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 4678 4679 // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w 4680 // intrinsics, hence we need different codegen for these cases. 4681 if (BuiltinID == ARM::BI__builtin_arm_crc32d || 4682 BuiltinID == ARM::BI__builtin_arm_crc32cd) { 4683 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 4684 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); 4685 Value *Arg1b = Builder.CreateLShr(Arg1, C1); 4686 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); 4687 4688 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4689 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a}); 4690 return Builder.CreateCall(F, {Res, Arg1b}); 4691 } else { 4692 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); 4693 4694 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4695 return Builder.CreateCall(F, {Arg0, Arg1}); 4696 } 4697 } 4698 4699 if (BuiltinID == ARM::BI__builtin_arm_rsr || 4700 BuiltinID == ARM::BI__builtin_arm_rsr64 || 4701 BuiltinID == ARM::BI__builtin_arm_rsrp || 4702 BuiltinID == ARM::BI__builtin_arm_wsr || 4703 BuiltinID == ARM::BI__builtin_arm_wsr64 || 4704 BuiltinID == ARM::BI__builtin_arm_wsrp) { 4705 4706 bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr || 4707 BuiltinID == ARM::BI__builtin_arm_rsr64 || 4708 BuiltinID == ARM::BI__builtin_arm_rsrp; 4709 4710 bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp || 4711 BuiltinID == ARM::BI__builtin_arm_wsrp; 4712 4713 bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 || 4714 BuiltinID == ARM::BI__builtin_arm_wsr64; 4715 4716 llvm::Type *ValueType; 4717 llvm::Type *RegisterType; 4718 if (IsPointerBuiltin) { 4719 ValueType = VoidPtrTy; 4720 RegisterType = Int32Ty; 4721 } else if (Is64Bit) { 4722 ValueType = RegisterType = Int64Ty; 4723 } else { 4724 ValueType = RegisterType = Int32Ty; 4725 } 4726 4727 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 4728 } 4729 4730 // Find out if any arguments are required to be integer constant 4731 // expressions. 4732 unsigned ICEArguments = 0; 4733 ASTContext::GetBuiltinTypeError Error; 4734 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 4735 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 4736 4737 auto getAlignmentValue32 = [&](Address addr) -> Value* { 4738 return Builder.getInt32(addr.getAlignment().getQuantity()); 4739 }; 4740 4741 Address PtrOp0 = Address::invalid(); 4742 Address PtrOp1 = Address::invalid(); 4743 SmallVector<Value*, 4> Ops; 4744 bool HasExtraArg = HasExtraNeonArgument(BuiltinID); 4745 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0); 4746 for (unsigned i = 0, e = NumArgs; i != e; i++) { 4747 if (i == 0) { 4748 switch (BuiltinID) { 4749 case NEON::BI__builtin_neon_vld1_v: 4750 case NEON::BI__builtin_neon_vld1q_v: 4751 case NEON::BI__builtin_neon_vld1q_lane_v: 4752 case NEON::BI__builtin_neon_vld1_lane_v: 4753 case NEON::BI__builtin_neon_vld1_dup_v: 4754 case NEON::BI__builtin_neon_vld1q_dup_v: 4755 case NEON::BI__builtin_neon_vst1_v: 4756 case NEON::BI__builtin_neon_vst1q_v: 4757 case NEON::BI__builtin_neon_vst1q_lane_v: 4758 case NEON::BI__builtin_neon_vst1_lane_v: 4759 case NEON::BI__builtin_neon_vst2_v: 4760 case NEON::BI__builtin_neon_vst2q_v: 4761 case NEON::BI__builtin_neon_vst2_lane_v: 4762 case NEON::BI__builtin_neon_vst2q_lane_v: 4763 case NEON::BI__builtin_neon_vst3_v: 4764 case NEON::BI__builtin_neon_vst3q_v: 4765 case NEON::BI__builtin_neon_vst3_lane_v: 4766 case NEON::BI__builtin_neon_vst3q_lane_v: 4767 case NEON::BI__builtin_neon_vst4_v: 4768 case NEON::BI__builtin_neon_vst4q_v: 4769 case NEON::BI__builtin_neon_vst4_lane_v: 4770 case NEON::BI__builtin_neon_vst4q_lane_v: 4771 // Get the alignment for the argument in addition to the value; 4772 // we'll use it later. 4773 PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); 4774 Ops.push_back(PtrOp0.getPointer()); 4775 continue; 4776 } 4777 } 4778 if (i == 1) { 4779 switch (BuiltinID) { 4780 case NEON::BI__builtin_neon_vld2_v: 4781 case NEON::BI__builtin_neon_vld2q_v: 4782 case NEON::BI__builtin_neon_vld3_v: 4783 case NEON::BI__builtin_neon_vld3q_v: 4784 case NEON::BI__builtin_neon_vld4_v: 4785 case NEON::BI__builtin_neon_vld4q_v: 4786 case NEON::BI__builtin_neon_vld2_lane_v: 4787 case NEON::BI__builtin_neon_vld2q_lane_v: 4788 case NEON::BI__builtin_neon_vld3_lane_v: 4789 case NEON::BI__builtin_neon_vld3q_lane_v: 4790 case NEON::BI__builtin_neon_vld4_lane_v: 4791 case NEON::BI__builtin_neon_vld4q_lane_v: 4792 case NEON::BI__builtin_neon_vld2_dup_v: 4793 case NEON::BI__builtin_neon_vld3_dup_v: 4794 case NEON::BI__builtin_neon_vld4_dup_v: 4795 // Get the alignment for the argument in addition to the value; 4796 // we'll use it later. 4797 PtrOp1 = EmitPointerWithAlignment(E->getArg(1)); 4798 Ops.push_back(PtrOp1.getPointer()); 4799 continue; 4800 } 4801 } 4802 4803 if ((ICEArguments & (1 << i)) == 0) { 4804 Ops.push_back(EmitScalarExpr(E->getArg(i))); 4805 } else { 4806 // If this is required to be a constant, constant fold it so that we know 4807 // that the generated intrinsic gets a ConstantInt. 4808 llvm::APSInt Result; 4809 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 4810 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 4811 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 4812 } 4813 } 4814 4815 switch (BuiltinID) { 4816 default: break; 4817 4818 case NEON::BI__builtin_neon_vget_lane_i8: 4819 case NEON::BI__builtin_neon_vget_lane_i16: 4820 case NEON::BI__builtin_neon_vget_lane_i32: 4821 case NEON::BI__builtin_neon_vget_lane_i64: 4822 case NEON::BI__builtin_neon_vget_lane_f32: 4823 case NEON::BI__builtin_neon_vgetq_lane_i8: 4824 case NEON::BI__builtin_neon_vgetq_lane_i16: 4825 case NEON::BI__builtin_neon_vgetq_lane_i32: 4826 case NEON::BI__builtin_neon_vgetq_lane_i64: 4827 case NEON::BI__builtin_neon_vgetq_lane_f32: 4828 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane"); 4829 4830 case NEON::BI__builtin_neon_vset_lane_i8: 4831 case NEON::BI__builtin_neon_vset_lane_i16: 4832 case NEON::BI__builtin_neon_vset_lane_i32: 4833 case NEON::BI__builtin_neon_vset_lane_i64: 4834 case NEON::BI__builtin_neon_vset_lane_f32: 4835 case NEON::BI__builtin_neon_vsetq_lane_i8: 4836 case NEON::BI__builtin_neon_vsetq_lane_i16: 4837 case NEON::BI__builtin_neon_vsetq_lane_i32: 4838 case NEON::BI__builtin_neon_vsetq_lane_i64: 4839 case NEON::BI__builtin_neon_vsetq_lane_f32: 4840 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4841 4842 case NEON::BI__builtin_neon_vsha1h_u32: 4843 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops, 4844 "vsha1h"); 4845 case NEON::BI__builtin_neon_vsha1cq_u32: 4846 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops, 4847 "vsha1h"); 4848 case NEON::BI__builtin_neon_vsha1pq_u32: 4849 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops, 4850 "vsha1h"); 4851 case NEON::BI__builtin_neon_vsha1mq_u32: 4852 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, 4853 "vsha1h"); 4854 4855 // The ARM _MoveToCoprocessor builtins put the input register value as 4856 // the first argument, but the LLVM intrinsic expects it as the third one. 4857 case ARM::BI_MoveToCoprocessor: 4858 case ARM::BI_MoveToCoprocessor2: { 4859 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? 4860 Intrinsic::arm_mcr : Intrinsic::arm_mcr2); 4861 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], 4862 Ops[3], Ops[4], Ops[5]}); 4863 } 4864 case ARM::BI_BitScanForward: 4865 case ARM::BI_BitScanForward64: 4866 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 4867 case ARM::BI_BitScanReverse: 4868 case ARM::BI_BitScanReverse64: 4869 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 4870 4871 case ARM::BI_InterlockedAnd64: 4872 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 4873 case ARM::BI_InterlockedExchange64: 4874 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 4875 case ARM::BI_InterlockedExchangeAdd64: 4876 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 4877 case ARM::BI_InterlockedExchangeSub64: 4878 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 4879 case ARM::BI_InterlockedOr64: 4880 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 4881 case ARM::BI_InterlockedXor64: 4882 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 4883 case ARM::BI_InterlockedDecrement64: 4884 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 4885 case ARM::BI_InterlockedIncrement64: 4886 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 4887 } 4888 4889 // Get the last argument, which specifies the vector type. 4890 assert(HasExtraArg); 4891 llvm::APSInt Result; 4892 const Expr *Arg = E->getArg(E->getNumArgs()-1); 4893 if (!Arg->isIntegerConstantExpr(Result, getContext())) 4894 return nullptr; 4895 4896 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 4897 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 4898 // Determine the overloaded type of this builtin. 4899 llvm::Type *Ty; 4900 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 4901 Ty = FloatTy; 4902 else 4903 Ty = DoubleTy; 4904 4905 // Determine whether this is an unsigned conversion or not. 4906 bool usgn = Result.getZExtValue() == 1; 4907 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 4908 4909 // Call the appropriate intrinsic. 4910 Function *F = CGM.getIntrinsic(Int, Ty); 4911 return Builder.CreateCall(F, Ops, "vcvtr"); 4912 } 4913 4914 // Determine the type of this overloaded NEON intrinsic. 4915 NeonTypeFlags Type(Result.getZExtValue()); 4916 bool usgn = Type.isUnsigned(); 4917 bool rightShift = false; 4918 4919 llvm::VectorType *VTy = GetNeonType(this, Type); 4920 llvm::Type *Ty = VTy; 4921 if (!Ty) 4922 return nullptr; 4923 4924 // Many NEON builtins have identical semantics and uses in ARM and 4925 // AArch64. Emit these in a single function. 4926 auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap); 4927 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 4928 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); 4929 if (Builtin) 4930 return EmitCommonNeonBuiltinExpr( 4931 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 4932 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1); 4933 4934 unsigned Int; 4935 switch (BuiltinID) { 4936 default: return nullptr; 4937 case NEON::BI__builtin_neon_vld1q_lane_v: 4938 // Handle 64-bit integer elements as a special case. Use shuffles of 4939 // one-element vectors to avoid poor code for i64 in the backend. 4940 if (VTy->getElementType()->isIntegerTy(64)) { 4941 // Extract the other lane. 4942 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4943 uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); 4944 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); 4945 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 4946 // Load the value as a one-element vector. 4947 Ty = llvm::VectorType::get(VTy->getElementType(), 1); 4948 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4949 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys); 4950 Value *Align = getAlignmentValue32(PtrOp0); 4951 Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); 4952 // Combine them. 4953 uint32_t Indices[] = {1 - Lane, Lane}; 4954 SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); 4955 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); 4956 } 4957 // fall through 4958 case NEON::BI__builtin_neon_vld1_lane_v: { 4959 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4960 PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType()); 4961 Value *Ld = Builder.CreateLoad(PtrOp0); 4962 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 4963 } 4964 case NEON::BI__builtin_neon_vld2_dup_v: 4965 case NEON::BI__builtin_neon_vld3_dup_v: 4966 case NEON::BI__builtin_neon_vld4_dup_v: { 4967 // Handle 64-bit elements as a special-case. There is no "dup" needed. 4968 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { 4969 switch (BuiltinID) { 4970 case NEON::BI__builtin_neon_vld2_dup_v: 4971 Int = Intrinsic::arm_neon_vld2; 4972 break; 4973 case NEON::BI__builtin_neon_vld3_dup_v: 4974 Int = Intrinsic::arm_neon_vld3; 4975 break; 4976 case NEON::BI__builtin_neon_vld4_dup_v: 4977 Int = Intrinsic::arm_neon_vld4; 4978 break; 4979 default: llvm_unreachable("unknown vld_dup intrinsic?"); 4980 } 4981 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4982 Function *F = CGM.getIntrinsic(Int, Tys); 4983 llvm::Value *Align = getAlignmentValue32(PtrOp1); 4984 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup"); 4985 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4986 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4987 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4988 } 4989 switch (BuiltinID) { 4990 case NEON::BI__builtin_neon_vld2_dup_v: 4991 Int = Intrinsic::arm_neon_vld2lane; 4992 break; 4993 case NEON::BI__builtin_neon_vld3_dup_v: 4994 Int = Intrinsic::arm_neon_vld3lane; 4995 break; 4996 case NEON::BI__builtin_neon_vld4_dup_v: 4997 Int = Intrinsic::arm_neon_vld4lane; 4998 break; 4999 default: llvm_unreachable("unknown vld_dup intrinsic?"); 5000 } 5001 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 5002 Function *F = CGM.getIntrinsic(Int, Tys); 5003 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 5004 5005 SmallVector<Value*, 6> Args; 5006 Args.push_back(Ops[1]); 5007 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 5008 5009 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 5010 Args.push_back(CI); 5011 Args.push_back(getAlignmentValue32(PtrOp1)); 5012 5013 Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); 5014 // splat lane 0 to all elts in each vector of the result. 5015 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 5016 Value *Val = Builder.CreateExtractValue(Ops[1], i); 5017 Value *Elt = Builder.CreateBitCast(Val, Ty); 5018 Elt = EmitNeonSplat(Elt, CI); 5019 Elt = Builder.CreateBitCast(Elt, Val->getType()); 5020 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 5021 } 5022 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5023 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5024 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 5025 } 5026 case NEON::BI__builtin_neon_vqrshrn_n_v: 5027 Int = 5028 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 5029 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 5030 1, true); 5031 case NEON::BI__builtin_neon_vqrshrun_n_v: 5032 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), 5033 Ops, "vqrshrun_n", 1, true); 5034 case NEON::BI__builtin_neon_vqshrn_n_v: 5035 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 5036 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 5037 1, true); 5038 case NEON::BI__builtin_neon_vqshrun_n_v: 5039 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), 5040 Ops, "vqshrun_n", 1, true); 5041 case NEON::BI__builtin_neon_vrecpe_v: 5042 case NEON::BI__builtin_neon_vrecpeq_v: 5043 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), 5044 Ops, "vrecpe"); 5045 case NEON::BI__builtin_neon_vrshrn_n_v: 5046 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), 5047 Ops, "vrshrn_n", 1, true); 5048 case NEON::BI__builtin_neon_vrsra_n_v: 5049 case NEON::BI__builtin_neon_vrsraq_n_v: 5050 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5051 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5052 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 5053 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 5054 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]}); 5055 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 5056 case NEON::BI__builtin_neon_vsri_n_v: 5057 case NEON::BI__builtin_neon_vsriq_n_v: 5058 rightShift = true; 5059 case NEON::BI__builtin_neon_vsli_n_v: 5060 case NEON::BI__builtin_neon_vsliq_n_v: 5061 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 5062 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), 5063 Ops, "vsli_n"); 5064 case NEON::BI__builtin_neon_vsra_n_v: 5065 case NEON::BI__builtin_neon_vsraq_n_v: 5066 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5067 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 5068 return Builder.CreateAdd(Ops[0], Ops[1]); 5069 case NEON::BI__builtin_neon_vst1q_lane_v: 5070 // Handle 64-bit integer elements as a special case. Use a shuffle to get 5071 // a one-element vector and avoid poor code for i64 in the backend. 5072 if (VTy->getElementType()->isIntegerTy(64)) { 5073 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5074 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); 5075 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 5076 Ops[2] = getAlignmentValue32(PtrOp0); 5077 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()}; 5078 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, 5079 Tys), Ops); 5080 } 5081 // fall through 5082 case NEON::BI__builtin_neon_vst1_lane_v: { 5083 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5084 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 5085 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5086 auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty)); 5087 return St; 5088 } 5089 case NEON::BI__builtin_neon_vtbl1_v: 5090 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 5091 Ops, "vtbl1"); 5092 case NEON::BI__builtin_neon_vtbl2_v: 5093 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 5094 Ops, "vtbl2"); 5095 case NEON::BI__builtin_neon_vtbl3_v: 5096 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 5097 Ops, "vtbl3"); 5098 case NEON::BI__builtin_neon_vtbl4_v: 5099 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 5100 Ops, "vtbl4"); 5101 case NEON::BI__builtin_neon_vtbx1_v: 5102 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 5103 Ops, "vtbx1"); 5104 case NEON::BI__builtin_neon_vtbx2_v: 5105 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 5106 Ops, "vtbx2"); 5107 case NEON::BI__builtin_neon_vtbx3_v: 5108 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 5109 Ops, "vtbx3"); 5110 case NEON::BI__builtin_neon_vtbx4_v: 5111 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 5112 Ops, "vtbx4"); 5113 } 5114 } 5115 5116 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, 5117 const CallExpr *E, 5118 SmallVectorImpl<Value *> &Ops) { 5119 unsigned int Int = 0; 5120 const char *s = nullptr; 5121 5122 switch (BuiltinID) { 5123 default: 5124 return nullptr; 5125 case NEON::BI__builtin_neon_vtbl1_v: 5126 case NEON::BI__builtin_neon_vqtbl1_v: 5127 case NEON::BI__builtin_neon_vqtbl1q_v: 5128 case NEON::BI__builtin_neon_vtbl2_v: 5129 case NEON::BI__builtin_neon_vqtbl2_v: 5130 case NEON::BI__builtin_neon_vqtbl2q_v: 5131 case NEON::BI__builtin_neon_vtbl3_v: 5132 case NEON::BI__builtin_neon_vqtbl3_v: 5133 case NEON::BI__builtin_neon_vqtbl3q_v: 5134 case NEON::BI__builtin_neon_vtbl4_v: 5135 case NEON::BI__builtin_neon_vqtbl4_v: 5136 case NEON::BI__builtin_neon_vqtbl4q_v: 5137 break; 5138 case NEON::BI__builtin_neon_vtbx1_v: 5139 case NEON::BI__builtin_neon_vqtbx1_v: 5140 case NEON::BI__builtin_neon_vqtbx1q_v: 5141 case NEON::BI__builtin_neon_vtbx2_v: 5142 case NEON::BI__builtin_neon_vqtbx2_v: 5143 case NEON::BI__builtin_neon_vqtbx2q_v: 5144 case NEON::BI__builtin_neon_vtbx3_v: 5145 case NEON::BI__builtin_neon_vqtbx3_v: 5146 case NEON::BI__builtin_neon_vqtbx3q_v: 5147 case NEON::BI__builtin_neon_vtbx4_v: 5148 case NEON::BI__builtin_neon_vqtbx4_v: 5149 case NEON::BI__builtin_neon_vqtbx4q_v: 5150 break; 5151 } 5152 5153 assert(E->getNumArgs() >= 3); 5154 5155 // Get the last argument, which specifies the vector type. 5156 llvm::APSInt Result; 5157 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 5158 if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) 5159 return nullptr; 5160 5161 // Determine the type of this overloaded NEON intrinsic. 5162 NeonTypeFlags Type(Result.getZExtValue()); 5163 llvm::VectorType *Ty = GetNeonType(&CGF, Type); 5164 if (!Ty) 5165 return nullptr; 5166 5167 CodeGen::CGBuilderTy &Builder = CGF.Builder; 5168 5169 // AArch64 scalar builtins are not overloaded, they do not have an extra 5170 // argument that specifies the vector type, need to handle each case. 5171 switch (BuiltinID) { 5172 case NEON::BI__builtin_neon_vtbl1_v: { 5173 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr, 5174 Ops[1], Ty, Intrinsic::aarch64_neon_tbl1, 5175 "vtbl1"); 5176 } 5177 case NEON::BI__builtin_neon_vtbl2_v: { 5178 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr, 5179 Ops[2], Ty, Intrinsic::aarch64_neon_tbl1, 5180 "vtbl1"); 5181 } 5182 case NEON::BI__builtin_neon_vtbl3_v: { 5183 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr, 5184 Ops[3], Ty, Intrinsic::aarch64_neon_tbl2, 5185 "vtbl2"); 5186 } 5187 case NEON::BI__builtin_neon_vtbl4_v: { 5188 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr, 5189 Ops[4], Ty, Intrinsic::aarch64_neon_tbl2, 5190 "vtbl2"); 5191 } 5192 case NEON::BI__builtin_neon_vtbx1_v: { 5193 Value *TblRes = 5194 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2], 5195 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); 5196 5197 llvm::Constant *EightV = ConstantInt::get(Ty, 8); 5198 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); 5199 CmpRes = Builder.CreateSExt(CmpRes, Ty); 5200 5201 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 5202 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 5203 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 5204 } 5205 case NEON::BI__builtin_neon_vtbx2_v: { 5206 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0], 5207 Ops[3], Ty, Intrinsic::aarch64_neon_tbx1, 5208 "vtbx1"); 5209 } 5210 case NEON::BI__builtin_neon_vtbx3_v: { 5211 Value *TblRes = 5212 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4], 5213 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); 5214 5215 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24); 5216 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], 5217 TwentyFourV); 5218 CmpRes = Builder.CreateSExt(CmpRes, Ty); 5219 5220 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 5221 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 5222 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 5223 } 5224 case NEON::BI__builtin_neon_vtbx4_v: { 5225 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0], 5226 Ops[5], Ty, Intrinsic::aarch64_neon_tbx2, 5227 "vtbx2"); 5228 } 5229 case NEON::BI__builtin_neon_vqtbl1_v: 5230 case NEON::BI__builtin_neon_vqtbl1q_v: 5231 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break; 5232 case NEON::BI__builtin_neon_vqtbl2_v: 5233 case NEON::BI__builtin_neon_vqtbl2q_v: { 5234 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break; 5235 case NEON::BI__builtin_neon_vqtbl3_v: 5236 case NEON::BI__builtin_neon_vqtbl3q_v: 5237 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break; 5238 case NEON::BI__builtin_neon_vqtbl4_v: 5239 case NEON::BI__builtin_neon_vqtbl4q_v: 5240 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break; 5241 case NEON::BI__builtin_neon_vqtbx1_v: 5242 case NEON::BI__builtin_neon_vqtbx1q_v: 5243 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break; 5244 case NEON::BI__builtin_neon_vqtbx2_v: 5245 case NEON::BI__builtin_neon_vqtbx2q_v: 5246 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break; 5247 case NEON::BI__builtin_neon_vqtbx3_v: 5248 case NEON::BI__builtin_neon_vqtbx3q_v: 5249 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break; 5250 case NEON::BI__builtin_neon_vqtbx4_v: 5251 case NEON::BI__builtin_neon_vqtbx4q_v: 5252 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break; 5253 } 5254 } 5255 5256 if (!Int) 5257 return nullptr; 5258 5259 Function *F = CGF.CGM.getIntrinsic(Int, Ty); 5260 return CGF.EmitNeonCall(F, Ops, s); 5261 } 5262 5263 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { 5264 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 5265 Op = Builder.CreateBitCast(Op, Int16Ty); 5266 Value *V = UndefValue::get(VTy); 5267 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 5268 Op = Builder.CreateInsertElement(V, Op, CI); 5269 return Op; 5270 } 5271 5272 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, 5273 const CallExpr *E) { 5274 unsigned HintID = static_cast<unsigned>(-1); 5275 switch (BuiltinID) { 5276 default: break; 5277 case AArch64::BI__builtin_arm_nop: 5278 HintID = 0; 5279 break; 5280 case AArch64::BI__builtin_arm_yield: 5281 HintID = 1; 5282 break; 5283 case AArch64::BI__builtin_arm_wfe: 5284 HintID = 2; 5285 break; 5286 case AArch64::BI__builtin_arm_wfi: 5287 HintID = 3; 5288 break; 5289 case AArch64::BI__builtin_arm_sev: 5290 HintID = 4; 5291 break; 5292 case AArch64::BI__builtin_arm_sevl: 5293 HintID = 5; 5294 break; 5295 } 5296 5297 if (HintID != static_cast<unsigned>(-1)) { 5298 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint); 5299 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 5300 } 5301 5302 if (BuiltinID == AArch64::BI__builtin_arm_prefetch) { 5303 Value *Address = EmitScalarExpr(E->getArg(0)); 5304 Value *RW = EmitScalarExpr(E->getArg(1)); 5305 Value *CacheLevel = EmitScalarExpr(E->getArg(2)); 5306 Value *RetentionPolicy = EmitScalarExpr(E->getArg(3)); 5307 Value *IsData = EmitScalarExpr(E->getArg(4)); 5308 5309 Value *Locality = nullptr; 5310 if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) { 5311 // Temporal fetch, needs to convert cache level to locality. 5312 Locality = llvm::ConstantInt::get(Int32Ty, 5313 -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3); 5314 } else { 5315 // Streaming fetch. 5316 Locality = llvm::ConstantInt::get(Int32Ty, 0); 5317 } 5318 5319 // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify 5320 // PLDL3STRM or PLDL2STRM. 5321 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 5322 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 5323 } 5324 5325 if (BuiltinID == AArch64::BI__builtin_arm_rbit) { 5326 assert((getContext().getTypeSize(E->getType()) == 32) && 5327 "rbit of unusual size!"); 5328 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 5329 return Builder.CreateCall( 5330 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 5331 } 5332 if (BuiltinID == AArch64::BI__builtin_arm_rbit64) { 5333 assert((getContext().getTypeSize(E->getType()) == 64) && 5334 "rbit of unusual size!"); 5335 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 5336 return Builder.CreateCall( 5337 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); 5338 } 5339 5340 if (BuiltinID == AArch64::BI__clear_cache) { 5341 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 5342 const FunctionDecl *FD = E->getDirectCallee(); 5343 Value *Ops[2]; 5344 for (unsigned i = 0; i < 2; i++) 5345 Ops[i] = EmitScalarExpr(E->getArg(i)); 5346 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 5347 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 5348 StringRef Name = FD->getName(); 5349 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 5350 } 5351 5352 if ((BuiltinID == AArch64::BI__builtin_arm_ldrex || 5353 BuiltinID == AArch64::BI__builtin_arm_ldaex) && 5354 getContext().getTypeSize(E->getType()) == 128) { 5355 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 5356 ? Intrinsic::aarch64_ldaxp 5357 : Intrinsic::aarch64_ldxp); 5358 5359 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 5360 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 5361 "ldxp"); 5362 5363 Value *Val0 = Builder.CreateExtractValue(Val, 1); 5364 Value *Val1 = Builder.CreateExtractValue(Val, 0); 5365 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 5366 Val0 = Builder.CreateZExt(Val0, Int128Ty); 5367 Val1 = Builder.CreateZExt(Val1, Int128Ty); 5368 5369 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64); 5370 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 5371 Val = Builder.CreateOr(Val, Val1); 5372 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 5373 } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex || 5374 BuiltinID == AArch64::BI__builtin_arm_ldaex) { 5375 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 5376 5377 QualType Ty = E->getType(); 5378 llvm::Type *RealResTy = ConvertType(Ty); 5379 llvm::Type *PtrTy = llvm::IntegerType::get( 5380 getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo(); 5381 LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy); 5382 5383 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 5384 ? Intrinsic::aarch64_ldaxr 5385 : Intrinsic::aarch64_ldxr, 5386 PtrTy); 5387 Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); 5388 5389 if (RealResTy->isPointerTy()) 5390 return Builder.CreateIntToPtr(Val, RealResTy); 5391 5392 llvm::Type *IntResTy = llvm::IntegerType::get( 5393 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); 5394 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 5395 return Builder.CreateBitCast(Val, RealResTy); 5396 } 5397 5398 if ((BuiltinID == AArch64::BI__builtin_arm_strex || 5399 BuiltinID == AArch64::BI__builtin_arm_stlex) && 5400 getContext().getTypeSize(E->getArg(0)->getType()) == 128) { 5401 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 5402 ? Intrinsic::aarch64_stlxp 5403 : Intrinsic::aarch64_stxp); 5404 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty); 5405 5406 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 5407 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true); 5408 5409 Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy)); 5410 llvm::Value *Val = Builder.CreateLoad(Tmp); 5411 5412 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 5413 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 5414 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), 5415 Int8PtrTy); 5416 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp"); 5417 } 5418 5419 if (BuiltinID == AArch64::BI__builtin_arm_strex || 5420 BuiltinID == AArch64::BI__builtin_arm_stlex) { 5421 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 5422 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 5423 5424 QualType Ty = E->getArg(0)->getType(); 5425 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 5426 getContext().getTypeSize(Ty)); 5427 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 5428 5429 if (StoreVal->getType()->isPointerTy()) 5430 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); 5431 else { 5432 llvm::Type *IntTy = llvm::IntegerType::get( 5433 getLLVMContext(), 5434 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); 5435 StoreVal = Builder.CreateBitCast(StoreVal, IntTy); 5436 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); 5437 } 5438 5439 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 5440 ? Intrinsic::aarch64_stlxr 5441 : Intrinsic::aarch64_stxr, 5442 StoreAddr->getType()); 5443 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); 5444 } 5445 5446 if (BuiltinID == AArch64::BI__builtin_arm_clrex) { 5447 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); 5448 return Builder.CreateCall(F); 5449 } 5450 5451 // CRC32 5452 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 5453 switch (BuiltinID) { 5454 case AArch64::BI__builtin_arm_crc32b: 5455 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; 5456 case AArch64::BI__builtin_arm_crc32cb: 5457 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; 5458 case AArch64::BI__builtin_arm_crc32h: 5459 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; 5460 case AArch64::BI__builtin_arm_crc32ch: 5461 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; 5462 case AArch64::BI__builtin_arm_crc32w: 5463 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; 5464 case AArch64::BI__builtin_arm_crc32cw: 5465 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; 5466 case AArch64::BI__builtin_arm_crc32d: 5467 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; 5468 case AArch64::BI__builtin_arm_crc32cd: 5469 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; 5470 } 5471 5472 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 5473 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 5474 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 5475 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 5476 5477 llvm::Type *DataTy = F->getFunctionType()->getParamType(1); 5478 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); 5479 5480 return Builder.CreateCall(F, {Arg0, Arg1}); 5481 } 5482 5483 if (BuiltinID == AArch64::BI__builtin_arm_rsr || 5484 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 5485 BuiltinID == AArch64::BI__builtin_arm_rsrp || 5486 BuiltinID == AArch64::BI__builtin_arm_wsr || 5487 BuiltinID == AArch64::BI__builtin_arm_wsr64 || 5488 BuiltinID == AArch64::BI__builtin_arm_wsrp) { 5489 5490 bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr || 5491 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 5492 BuiltinID == AArch64::BI__builtin_arm_rsrp; 5493 5494 bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp || 5495 BuiltinID == AArch64::BI__builtin_arm_wsrp; 5496 5497 bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr && 5498 BuiltinID != AArch64::BI__builtin_arm_wsr; 5499 5500 llvm::Type *ValueType; 5501 llvm::Type *RegisterType = Int64Ty; 5502 if (IsPointerBuiltin) { 5503 ValueType = VoidPtrTy; 5504 } else if (Is64Bit) { 5505 ValueType = Int64Ty; 5506 } else { 5507 ValueType = Int32Ty; 5508 } 5509 5510 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 5511 } 5512 5513 // Find out if any arguments are required to be integer constant 5514 // expressions. 5515 unsigned ICEArguments = 0; 5516 ASTContext::GetBuiltinTypeError Error; 5517 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 5518 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 5519 5520 llvm::SmallVector<Value*, 4> Ops; 5521 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 5522 if ((ICEArguments & (1 << i)) == 0) { 5523 Ops.push_back(EmitScalarExpr(E->getArg(i))); 5524 } else { 5525 // If this is required to be a constant, constant fold it so that we know 5526 // that the generated intrinsic gets a ConstantInt. 5527 llvm::APSInt Result; 5528 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 5529 assert(IsConst && "Constant arg isn't actually constant?"); 5530 (void)IsConst; 5531 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 5532 } 5533 } 5534 5535 auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap); 5536 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 5537 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); 5538 5539 if (Builtin) { 5540 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); 5541 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); 5542 assert(Result && "SISD intrinsic should have been handled"); 5543 return Result; 5544 } 5545 5546 llvm::APSInt Result; 5547 const Expr *Arg = E->getArg(E->getNumArgs()-1); 5548 NeonTypeFlags Type(0); 5549 if (Arg->isIntegerConstantExpr(Result, getContext())) 5550 // Determine the type of this overloaded NEON intrinsic. 5551 Type = NeonTypeFlags(Result.getZExtValue()); 5552 5553 bool usgn = Type.isUnsigned(); 5554 bool quad = Type.isQuad(); 5555 5556 // Handle non-overloaded intrinsics first. 5557 switch (BuiltinID) { 5558 default: break; 5559 case NEON::BI__builtin_neon_vldrq_p128: { 5560 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); 5561 llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0); 5562 Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); 5563 return Builder.CreateAlignedLoad(Int128Ty, Ptr, 5564 CharUnits::fromQuantity(16)); 5565 } 5566 case NEON::BI__builtin_neon_vstrq_p128: { 5567 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 5568 Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); 5569 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr); 5570 } 5571 case NEON::BI__builtin_neon_vcvts_u32_f32: 5572 case NEON::BI__builtin_neon_vcvtd_u64_f64: 5573 usgn = true; 5574 // FALL THROUGH 5575 case NEON::BI__builtin_neon_vcvts_s32_f32: 5576 case NEON::BI__builtin_neon_vcvtd_s64_f64: { 5577 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5578 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 5579 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 5580 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 5581 Ops[0] = Builder.CreateBitCast(Ops[0], FTy); 5582 if (usgn) 5583 return Builder.CreateFPToUI(Ops[0], InTy); 5584 return Builder.CreateFPToSI(Ops[0], InTy); 5585 } 5586 case NEON::BI__builtin_neon_vcvts_f32_u32: 5587 case NEON::BI__builtin_neon_vcvtd_f64_u64: 5588 usgn = true; 5589 // FALL THROUGH 5590 case NEON::BI__builtin_neon_vcvts_f32_s32: 5591 case NEON::BI__builtin_neon_vcvtd_f64_s64: { 5592 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5593 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 5594 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 5595 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 5596 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 5597 if (usgn) 5598 return Builder.CreateUIToFP(Ops[0], FTy); 5599 return Builder.CreateSIToFP(Ops[0], FTy); 5600 } 5601 case NEON::BI__builtin_neon_vpaddd_s64: { 5602 llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2); 5603 Value *Vec = EmitScalarExpr(E->getArg(0)); 5604 // The vector is v2f64, so make sure it's bitcast to that. 5605 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); 5606 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5607 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5608 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5609 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5610 // Pairwise addition of a v2f64 into a scalar f64. 5611 return Builder.CreateAdd(Op0, Op1, "vpaddd"); 5612 } 5613 case NEON::BI__builtin_neon_vpaddd_f64: { 5614 llvm::Type *Ty = 5615 llvm::VectorType::get(DoubleTy, 2); 5616 Value *Vec = EmitScalarExpr(E->getArg(0)); 5617 // The vector is v2f64, so make sure it's bitcast to that. 5618 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); 5619 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5620 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5621 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5622 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5623 // Pairwise addition of a v2f64 into a scalar f64. 5624 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 5625 } 5626 case NEON::BI__builtin_neon_vpadds_f32: { 5627 llvm::Type *Ty = 5628 llvm::VectorType::get(FloatTy, 2); 5629 Value *Vec = EmitScalarExpr(E->getArg(0)); 5630 // The vector is v2f32, so make sure it's bitcast to that. 5631 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); 5632 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5633 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5634 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5635 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5636 // Pairwise addition of a v2f32 into a scalar f32. 5637 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 5638 } 5639 case NEON::BI__builtin_neon_vceqzd_s64: 5640 case NEON::BI__builtin_neon_vceqzd_f64: 5641 case NEON::BI__builtin_neon_vceqzs_f32: 5642 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5643 return EmitAArch64CompareBuiltinExpr( 5644 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5645 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz"); 5646 case NEON::BI__builtin_neon_vcgezd_s64: 5647 case NEON::BI__builtin_neon_vcgezd_f64: 5648 case NEON::BI__builtin_neon_vcgezs_f32: 5649 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5650 return EmitAArch64CompareBuiltinExpr( 5651 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5652 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez"); 5653 case NEON::BI__builtin_neon_vclezd_s64: 5654 case NEON::BI__builtin_neon_vclezd_f64: 5655 case NEON::BI__builtin_neon_vclezs_f32: 5656 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5657 return EmitAArch64CompareBuiltinExpr( 5658 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5659 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez"); 5660 case NEON::BI__builtin_neon_vcgtzd_s64: 5661 case NEON::BI__builtin_neon_vcgtzd_f64: 5662 case NEON::BI__builtin_neon_vcgtzs_f32: 5663 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5664 return EmitAArch64CompareBuiltinExpr( 5665 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5666 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz"); 5667 case NEON::BI__builtin_neon_vcltzd_s64: 5668 case NEON::BI__builtin_neon_vcltzd_f64: 5669 case NEON::BI__builtin_neon_vcltzs_f32: 5670 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5671 return EmitAArch64CompareBuiltinExpr( 5672 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5673 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz"); 5674 5675 case NEON::BI__builtin_neon_vceqzd_u64: { 5676 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5677 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5678 Ops[0] = 5679 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty)); 5680 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd"); 5681 } 5682 case NEON::BI__builtin_neon_vceqd_f64: 5683 case NEON::BI__builtin_neon_vcled_f64: 5684 case NEON::BI__builtin_neon_vcltd_f64: 5685 case NEON::BI__builtin_neon_vcged_f64: 5686 case NEON::BI__builtin_neon_vcgtd_f64: { 5687 llvm::CmpInst::Predicate P; 5688 switch (BuiltinID) { 5689 default: llvm_unreachable("missing builtin ID in switch!"); 5690 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break; 5691 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break; 5692 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break; 5693 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break; 5694 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break; 5695 } 5696 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5697 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5698 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 5699 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 5700 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); 5701 } 5702 case NEON::BI__builtin_neon_vceqs_f32: 5703 case NEON::BI__builtin_neon_vcles_f32: 5704 case NEON::BI__builtin_neon_vclts_f32: 5705 case NEON::BI__builtin_neon_vcges_f32: 5706 case NEON::BI__builtin_neon_vcgts_f32: { 5707 llvm::CmpInst::Predicate P; 5708 switch (BuiltinID) { 5709 default: llvm_unreachable("missing builtin ID in switch!"); 5710 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break; 5711 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break; 5712 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break; 5713 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break; 5714 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break; 5715 } 5716 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5717 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 5718 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); 5719 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 5720 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); 5721 } 5722 case NEON::BI__builtin_neon_vceqd_s64: 5723 case NEON::BI__builtin_neon_vceqd_u64: 5724 case NEON::BI__builtin_neon_vcgtd_s64: 5725 case NEON::BI__builtin_neon_vcgtd_u64: 5726 case NEON::BI__builtin_neon_vcltd_s64: 5727 case NEON::BI__builtin_neon_vcltd_u64: 5728 case NEON::BI__builtin_neon_vcged_u64: 5729 case NEON::BI__builtin_neon_vcged_s64: 5730 case NEON::BI__builtin_neon_vcled_u64: 5731 case NEON::BI__builtin_neon_vcled_s64: { 5732 llvm::CmpInst::Predicate P; 5733 switch (BuiltinID) { 5734 default: llvm_unreachable("missing builtin ID in switch!"); 5735 case NEON::BI__builtin_neon_vceqd_s64: 5736 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break; 5737 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break; 5738 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break; 5739 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break; 5740 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break; 5741 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break; 5742 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break; 5743 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break; 5744 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break; 5745 } 5746 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5747 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5748 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5749 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); 5750 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); 5751 } 5752 case NEON::BI__builtin_neon_vtstd_s64: 5753 case NEON::BI__builtin_neon_vtstd_u64: { 5754 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5755 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5756 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5757 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 5758 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 5759 llvm::Constant::getNullValue(Int64Ty)); 5760 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd"); 5761 } 5762 case NEON::BI__builtin_neon_vset_lane_i8: 5763 case NEON::BI__builtin_neon_vset_lane_i16: 5764 case NEON::BI__builtin_neon_vset_lane_i32: 5765 case NEON::BI__builtin_neon_vset_lane_i64: 5766 case NEON::BI__builtin_neon_vset_lane_f32: 5767 case NEON::BI__builtin_neon_vsetq_lane_i8: 5768 case NEON::BI__builtin_neon_vsetq_lane_i16: 5769 case NEON::BI__builtin_neon_vsetq_lane_i32: 5770 case NEON::BI__builtin_neon_vsetq_lane_i64: 5771 case NEON::BI__builtin_neon_vsetq_lane_f32: 5772 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5773 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5774 case NEON::BI__builtin_neon_vset_lane_f64: 5775 // The vector type needs a cast for the v1f64 variant. 5776 Ops[1] = Builder.CreateBitCast(Ops[1], 5777 llvm::VectorType::get(DoubleTy, 1)); 5778 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5779 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5780 case NEON::BI__builtin_neon_vsetq_lane_f64: 5781 // The vector type needs a cast for the v2f64 variant. 5782 Ops[1] = Builder.CreateBitCast(Ops[1], 5783 llvm::VectorType::get(DoubleTy, 2)); 5784 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5785 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5786 5787 case NEON::BI__builtin_neon_vget_lane_i8: 5788 case NEON::BI__builtin_neon_vdupb_lane_i8: 5789 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8)); 5790 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5791 "vget_lane"); 5792 case NEON::BI__builtin_neon_vgetq_lane_i8: 5793 case NEON::BI__builtin_neon_vdupb_laneq_i8: 5794 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16)); 5795 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5796 "vgetq_lane"); 5797 case NEON::BI__builtin_neon_vget_lane_i16: 5798 case NEON::BI__builtin_neon_vduph_lane_i16: 5799 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4)); 5800 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5801 "vget_lane"); 5802 case NEON::BI__builtin_neon_vgetq_lane_i16: 5803 case NEON::BI__builtin_neon_vduph_laneq_i16: 5804 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8)); 5805 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5806 "vgetq_lane"); 5807 case NEON::BI__builtin_neon_vget_lane_i32: 5808 case NEON::BI__builtin_neon_vdups_lane_i32: 5809 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2)); 5810 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5811 "vget_lane"); 5812 case NEON::BI__builtin_neon_vdups_lane_f32: 5813 Ops[0] = Builder.CreateBitCast(Ops[0], 5814 llvm::VectorType::get(FloatTy, 2)); 5815 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5816 "vdups_lane"); 5817 case NEON::BI__builtin_neon_vgetq_lane_i32: 5818 case NEON::BI__builtin_neon_vdups_laneq_i32: 5819 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 5820 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5821 "vgetq_lane"); 5822 case NEON::BI__builtin_neon_vget_lane_i64: 5823 case NEON::BI__builtin_neon_vdupd_lane_i64: 5824 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1)); 5825 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5826 "vget_lane"); 5827 case NEON::BI__builtin_neon_vdupd_lane_f64: 5828 Ops[0] = Builder.CreateBitCast(Ops[0], 5829 llvm::VectorType::get(DoubleTy, 1)); 5830 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5831 "vdupd_lane"); 5832 case NEON::BI__builtin_neon_vgetq_lane_i64: 5833 case NEON::BI__builtin_neon_vdupd_laneq_i64: 5834 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 5835 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5836 "vgetq_lane"); 5837 case NEON::BI__builtin_neon_vget_lane_f32: 5838 Ops[0] = Builder.CreateBitCast(Ops[0], 5839 llvm::VectorType::get(FloatTy, 2)); 5840 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5841 "vget_lane"); 5842 case NEON::BI__builtin_neon_vget_lane_f64: 5843 Ops[0] = Builder.CreateBitCast(Ops[0], 5844 llvm::VectorType::get(DoubleTy, 1)); 5845 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5846 "vget_lane"); 5847 case NEON::BI__builtin_neon_vgetq_lane_f32: 5848 case NEON::BI__builtin_neon_vdups_laneq_f32: 5849 Ops[0] = Builder.CreateBitCast(Ops[0], 5850 llvm::VectorType::get(FloatTy, 4)); 5851 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5852 "vgetq_lane"); 5853 case NEON::BI__builtin_neon_vgetq_lane_f64: 5854 case NEON::BI__builtin_neon_vdupd_laneq_f64: 5855 Ops[0] = Builder.CreateBitCast(Ops[0], 5856 llvm::VectorType::get(DoubleTy, 2)); 5857 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5858 "vgetq_lane"); 5859 case NEON::BI__builtin_neon_vaddd_s64: 5860 case NEON::BI__builtin_neon_vaddd_u64: 5861 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); 5862 case NEON::BI__builtin_neon_vsubd_s64: 5863 case NEON::BI__builtin_neon_vsubd_u64: 5864 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd"); 5865 case NEON::BI__builtin_neon_vqdmlalh_s16: 5866 case NEON::BI__builtin_neon_vqdmlslh_s16: { 5867 SmallVector<Value *, 2> ProductOps; 5868 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 5869 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); 5870 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 5871 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 5872 ProductOps, "vqdmlXl"); 5873 Constant *CI = ConstantInt::get(SizeTy, 0); 5874 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 5875 5876 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 5877 ? Intrinsic::aarch64_neon_sqadd 5878 : Intrinsic::aarch64_neon_sqsub; 5879 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl"); 5880 } 5881 case NEON::BI__builtin_neon_vqshlud_n_s64: { 5882 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5883 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 5884 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty), 5885 Ops, "vqshlu_n"); 5886 } 5887 case NEON::BI__builtin_neon_vqshld_n_u64: 5888 case NEON::BI__builtin_neon_vqshld_n_s64: { 5889 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64 5890 ? Intrinsic::aarch64_neon_uqshl 5891 : Intrinsic::aarch64_neon_sqshl; 5892 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5893 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 5894 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n"); 5895 } 5896 case NEON::BI__builtin_neon_vrshrd_n_u64: 5897 case NEON::BI__builtin_neon_vrshrd_n_s64: { 5898 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64 5899 ? Intrinsic::aarch64_neon_urshl 5900 : Intrinsic::aarch64_neon_srshl; 5901 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5902 int SV = cast<ConstantInt>(Ops[1])->getSExtValue(); 5903 Ops[1] = ConstantInt::get(Int64Ty, -SV); 5904 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n"); 5905 } 5906 case NEON::BI__builtin_neon_vrsrad_n_u64: 5907 case NEON::BI__builtin_neon_vrsrad_n_s64: { 5908 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64 5909 ? Intrinsic::aarch64_neon_urshl 5910 : Intrinsic::aarch64_neon_srshl; 5911 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5912 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); 5913 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty), 5914 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)}); 5915 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); 5916 } 5917 case NEON::BI__builtin_neon_vshld_n_s64: 5918 case NEON::BI__builtin_neon_vshld_n_u64: { 5919 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 5920 return Builder.CreateShl( 5921 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n"); 5922 } 5923 case NEON::BI__builtin_neon_vshrd_n_s64: { 5924 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 5925 return Builder.CreateAShr( 5926 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 5927 Amt->getZExtValue())), 5928 "shrd_n"); 5929 } 5930 case NEON::BI__builtin_neon_vshrd_n_u64: { 5931 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 5932 uint64_t ShiftAmt = Amt->getZExtValue(); 5933 // Right-shifting an unsigned value by its size yields 0. 5934 if (ShiftAmt == 64) 5935 return ConstantInt::get(Int64Ty, 0); 5936 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt), 5937 "shrd_n"); 5938 } 5939 case NEON::BI__builtin_neon_vsrad_n_s64: { 5940 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 5941 Ops[1] = Builder.CreateAShr( 5942 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 5943 Amt->getZExtValue())), 5944 "shrd_n"); 5945 return Builder.CreateAdd(Ops[0], Ops[1]); 5946 } 5947 case NEON::BI__builtin_neon_vsrad_n_u64: { 5948 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 5949 uint64_t ShiftAmt = Amt->getZExtValue(); 5950 // Right-shifting an unsigned value by its size yields 0. 5951 // As Op + 0 = Op, return Ops[0] directly. 5952 if (ShiftAmt == 64) 5953 return Ops[0]; 5954 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt), 5955 "shrd_n"); 5956 return Builder.CreateAdd(Ops[0], Ops[1]); 5957 } 5958 case NEON::BI__builtin_neon_vqdmlalh_lane_s16: 5959 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: 5960 case NEON::BI__builtin_neon_vqdmlslh_lane_s16: 5961 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: { 5962 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 5963 "lane"); 5964 SmallVector<Value *, 2> ProductOps; 5965 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 5966 ProductOps.push_back(vectorWrapScalar16(Ops[2])); 5967 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 5968 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 5969 ProductOps, "vqdmlXl"); 5970 Constant *CI = ConstantInt::get(SizeTy, 0); 5971 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 5972 Ops.pop_back(); 5973 5974 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 || 5975 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16) 5976 ? Intrinsic::aarch64_neon_sqadd 5977 : Intrinsic::aarch64_neon_sqsub; 5978 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl"); 5979 } 5980 case NEON::BI__builtin_neon_vqdmlals_s32: 5981 case NEON::BI__builtin_neon_vqdmlsls_s32: { 5982 SmallVector<Value *, 2> ProductOps; 5983 ProductOps.push_back(Ops[1]); 5984 ProductOps.push_back(EmitScalarExpr(E->getArg(2))); 5985 Ops[1] = 5986 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 5987 ProductOps, "vqdmlXl"); 5988 5989 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32 5990 ? Intrinsic::aarch64_neon_sqadd 5991 : Intrinsic::aarch64_neon_sqsub; 5992 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl"); 5993 } 5994 case NEON::BI__builtin_neon_vqdmlals_lane_s32: 5995 case NEON::BI__builtin_neon_vqdmlals_laneq_s32: 5996 case NEON::BI__builtin_neon_vqdmlsls_lane_s32: 5997 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { 5998 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 5999 "lane"); 6000 SmallVector<Value *, 2> ProductOps; 6001 ProductOps.push_back(Ops[1]); 6002 ProductOps.push_back(Ops[2]); 6003 Ops[1] = 6004 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 6005 ProductOps, "vqdmlXl"); 6006 Ops.pop_back(); 6007 6008 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 || 6009 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32) 6010 ? Intrinsic::aarch64_neon_sqadd 6011 : Intrinsic::aarch64_neon_sqsub; 6012 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); 6013 } 6014 } 6015 6016 llvm::VectorType *VTy = GetNeonType(this, Type); 6017 llvm::Type *Ty = VTy; 6018 if (!Ty) 6019 return nullptr; 6020 6021 // Not all intrinsics handled by the common case work for AArch64 yet, so only 6022 // defer to common code if it's been added to our special map. 6023 Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, 6024 AArch64SIMDIntrinsicsProvenSorted); 6025 6026 if (Builtin) 6027 return EmitCommonNeonBuiltinExpr( 6028 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 6029 Builtin->NameHint, Builtin->TypeModifier, E, Ops, 6030 /*never use addresses*/ Address::invalid(), Address::invalid()); 6031 6032 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops)) 6033 return V; 6034 6035 unsigned Int; 6036 switch (BuiltinID) { 6037 default: return nullptr; 6038 case NEON::BI__builtin_neon_vbsl_v: 6039 case NEON::BI__builtin_neon_vbslq_v: { 6040 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy); 6041 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl"); 6042 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl"); 6043 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl"); 6044 6045 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl"); 6046 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl"); 6047 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl"); 6048 return Builder.CreateBitCast(Ops[0], Ty); 6049 } 6050 case NEON::BI__builtin_neon_vfma_lane_v: 6051 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types 6052 // The ARM builtins (and instructions) have the addend as the first 6053 // operand, but the 'fma' intrinsics have it last. Swap it around here. 6054 Value *Addend = Ops[0]; 6055 Value *Multiplicand = Ops[1]; 6056 Value *LaneSource = Ops[2]; 6057 Ops[0] = Multiplicand; 6058 Ops[1] = LaneSource; 6059 Ops[2] = Addend; 6060 6061 // Now adjust things to handle the lane access. 6062 llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ? 6063 llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) : 6064 VTy; 6065 llvm::Constant *cst = cast<Constant>(Ops[3]); 6066 Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst); 6067 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); 6068 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); 6069 6070 Ops.pop_back(); 6071 Int = Intrinsic::fma; 6072 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); 6073 } 6074 case NEON::BI__builtin_neon_vfma_laneq_v: { 6075 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 6076 // v1f64 fma should be mapped to Neon scalar f64 fma 6077 if (VTy && VTy->getElementType() == DoubleTy) { 6078 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6079 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 6080 llvm::Type *VTy = GetNeonType(this, 6081 NeonTypeFlags(NeonTypeFlags::Float64, false, true)); 6082 Ops[2] = Builder.CreateBitCast(Ops[2], VTy); 6083 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 6084 Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); 6085 Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 6086 return Builder.CreateBitCast(Result, Ty); 6087 } 6088 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 6089 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6090 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6091 6092 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 6093 VTy->getNumElements() * 2); 6094 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 6095 Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), 6096 cast<ConstantInt>(Ops[3])); 6097 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 6098 6099 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 6100 } 6101 case NEON::BI__builtin_neon_vfmaq_laneq_v: { 6102 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 6103 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6104 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6105 6106 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6107 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 6108 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 6109 } 6110 case NEON::BI__builtin_neon_vfmas_lane_f32: 6111 case NEON::BI__builtin_neon_vfmas_laneq_f32: 6112 case NEON::BI__builtin_neon_vfmad_lane_f64: 6113 case NEON::BI__builtin_neon_vfmad_laneq_f64: { 6114 Ops.push_back(EmitScalarExpr(E->getArg(3))); 6115 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 6116 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 6117 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 6118 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 6119 } 6120 case NEON::BI__builtin_neon_vmull_v: 6121 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6122 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; 6123 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull; 6124 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 6125 case NEON::BI__builtin_neon_vmax_v: 6126 case NEON::BI__builtin_neon_vmaxq_v: 6127 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6128 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; 6129 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; 6130 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); 6131 case NEON::BI__builtin_neon_vmin_v: 6132 case NEON::BI__builtin_neon_vminq_v: 6133 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6134 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; 6135 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; 6136 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); 6137 case NEON::BI__builtin_neon_vabd_v: 6138 case NEON::BI__builtin_neon_vabdq_v: 6139 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6140 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd; 6141 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd; 6142 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); 6143 case NEON::BI__builtin_neon_vpadal_v: 6144 case NEON::BI__builtin_neon_vpadalq_v: { 6145 unsigned ArgElts = VTy->getNumElements(); 6146 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType()); 6147 unsigned BitWidth = EltTy->getBitWidth(); 6148 llvm::Type *ArgTy = llvm::VectorType::get( 6149 llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts); 6150 llvm::Type* Tys[2] = { VTy, ArgTy }; 6151 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp; 6152 SmallVector<llvm::Value*, 1> TmpOps; 6153 TmpOps.push_back(Ops[1]); 6154 Function *F = CGM.getIntrinsic(Int, Tys); 6155 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal"); 6156 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType()); 6157 return Builder.CreateAdd(tmp, addend); 6158 } 6159 case NEON::BI__builtin_neon_vpmin_v: 6160 case NEON::BI__builtin_neon_vpminq_v: 6161 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6162 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp; 6163 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp; 6164 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 6165 case NEON::BI__builtin_neon_vpmax_v: 6166 case NEON::BI__builtin_neon_vpmaxq_v: 6167 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 6168 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp; 6169 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp; 6170 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 6171 case NEON::BI__builtin_neon_vminnm_v: 6172 case NEON::BI__builtin_neon_vminnmq_v: 6173 Int = Intrinsic::aarch64_neon_fminnm; 6174 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); 6175 case NEON::BI__builtin_neon_vmaxnm_v: 6176 case NEON::BI__builtin_neon_vmaxnmq_v: 6177 Int = Intrinsic::aarch64_neon_fmaxnm; 6178 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); 6179 case NEON::BI__builtin_neon_vrecpss_f32: { 6180 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6181 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy), 6182 Ops, "vrecps"); 6183 } 6184 case NEON::BI__builtin_neon_vrecpsd_f64: { 6185 Ops.push_back(EmitScalarExpr(E->getArg(1))); 6186 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy), 6187 Ops, "vrecps"); 6188 } 6189 case NEON::BI__builtin_neon_vqshrun_n_v: 6190 Int = Intrinsic::aarch64_neon_sqshrun; 6191 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); 6192 case NEON::BI__builtin_neon_vqrshrun_n_v: 6193 Int = Intrinsic::aarch64_neon_sqrshrun; 6194 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); 6195 case NEON::BI__builtin_neon_vqshrn_n_v: 6196 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn; 6197 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); 6198 case NEON::BI__builtin_neon_vrshrn_n_v: 6199 Int = Intrinsic::aarch64_neon_rshrn; 6200 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); 6201 case NEON::BI__builtin_neon_vqrshrn_n_v: 6202 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; 6203 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); 6204 case NEON::BI__builtin_neon_vrnda_v: 6205 case NEON::BI__builtin_neon_vrndaq_v: { 6206 Int = Intrinsic::round; 6207 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); 6208 } 6209 case NEON::BI__builtin_neon_vrndi_v: 6210 case NEON::BI__builtin_neon_vrndiq_v: { 6211 Int = Intrinsic::nearbyint; 6212 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); 6213 } 6214 case NEON::BI__builtin_neon_vrndm_v: 6215 case NEON::BI__builtin_neon_vrndmq_v: { 6216 Int = Intrinsic::floor; 6217 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); 6218 } 6219 case NEON::BI__builtin_neon_vrndn_v: 6220 case NEON::BI__builtin_neon_vrndnq_v: { 6221 Int = Intrinsic::aarch64_neon_frintn; 6222 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); 6223 } 6224 case NEON::BI__builtin_neon_vrndp_v: 6225 case NEON::BI__builtin_neon_vrndpq_v: { 6226 Int = Intrinsic::ceil; 6227 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); 6228 } 6229 case NEON::BI__builtin_neon_vrndx_v: 6230 case NEON::BI__builtin_neon_vrndxq_v: { 6231 Int = Intrinsic::rint; 6232 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); 6233 } 6234 case NEON::BI__builtin_neon_vrnd_v: 6235 case NEON::BI__builtin_neon_vrndq_v: { 6236 Int = Intrinsic::trunc; 6237 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); 6238 } 6239 case NEON::BI__builtin_neon_vceqz_v: 6240 case NEON::BI__builtin_neon_vceqzq_v: 6241 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, 6242 ICmpInst::ICMP_EQ, "vceqz"); 6243 case NEON::BI__builtin_neon_vcgez_v: 6244 case NEON::BI__builtin_neon_vcgezq_v: 6245 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, 6246 ICmpInst::ICMP_SGE, "vcgez"); 6247 case NEON::BI__builtin_neon_vclez_v: 6248 case NEON::BI__builtin_neon_vclezq_v: 6249 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, 6250 ICmpInst::ICMP_SLE, "vclez"); 6251 case NEON::BI__builtin_neon_vcgtz_v: 6252 case NEON::BI__builtin_neon_vcgtzq_v: 6253 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, 6254 ICmpInst::ICMP_SGT, "vcgtz"); 6255 case NEON::BI__builtin_neon_vcltz_v: 6256 case NEON::BI__builtin_neon_vcltzq_v: 6257 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, 6258 ICmpInst::ICMP_SLT, "vcltz"); 6259 case NEON::BI__builtin_neon_vcvt_f64_v: 6260 case NEON::BI__builtin_neon_vcvtq_f64_v: 6261 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6262 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 6263 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 6264 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 6265 case NEON::BI__builtin_neon_vcvt_f64_f32: { 6266 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && 6267 "unexpected vcvt_f64_f32 builtin"); 6268 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); 6269 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 6270 6271 return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); 6272 } 6273 case NEON::BI__builtin_neon_vcvt_f32_f64: { 6274 assert(Type.getEltType() == NeonTypeFlags::Float32 && 6275 "unexpected vcvt_f32_f64 builtin"); 6276 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); 6277 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 6278 6279 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); 6280 } 6281 case NEON::BI__builtin_neon_vcvt_s32_v: 6282 case NEON::BI__builtin_neon_vcvt_u32_v: 6283 case NEON::BI__builtin_neon_vcvt_s64_v: 6284 case NEON::BI__builtin_neon_vcvt_u64_v: 6285 case NEON::BI__builtin_neon_vcvtq_s32_v: 6286 case NEON::BI__builtin_neon_vcvtq_u32_v: 6287 case NEON::BI__builtin_neon_vcvtq_s64_v: 6288 case NEON::BI__builtin_neon_vcvtq_u64_v: { 6289 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 6290 if (usgn) 6291 return Builder.CreateFPToUI(Ops[0], Ty); 6292 return Builder.CreateFPToSI(Ops[0], Ty); 6293 } 6294 case NEON::BI__builtin_neon_vcvta_s32_v: 6295 case NEON::BI__builtin_neon_vcvtaq_s32_v: 6296 case NEON::BI__builtin_neon_vcvta_u32_v: 6297 case NEON::BI__builtin_neon_vcvtaq_u32_v: 6298 case NEON::BI__builtin_neon_vcvta_s64_v: 6299 case NEON::BI__builtin_neon_vcvtaq_s64_v: 6300 case NEON::BI__builtin_neon_vcvta_u64_v: 6301 case NEON::BI__builtin_neon_vcvtaq_u64_v: { 6302 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas; 6303 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6304 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); 6305 } 6306 case NEON::BI__builtin_neon_vcvtm_s32_v: 6307 case NEON::BI__builtin_neon_vcvtmq_s32_v: 6308 case NEON::BI__builtin_neon_vcvtm_u32_v: 6309 case NEON::BI__builtin_neon_vcvtmq_u32_v: 6310 case NEON::BI__builtin_neon_vcvtm_s64_v: 6311 case NEON::BI__builtin_neon_vcvtmq_s64_v: 6312 case NEON::BI__builtin_neon_vcvtm_u64_v: 6313 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 6314 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms; 6315 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6316 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); 6317 } 6318 case NEON::BI__builtin_neon_vcvtn_s32_v: 6319 case NEON::BI__builtin_neon_vcvtnq_s32_v: 6320 case NEON::BI__builtin_neon_vcvtn_u32_v: 6321 case NEON::BI__builtin_neon_vcvtnq_u32_v: 6322 case NEON::BI__builtin_neon_vcvtn_s64_v: 6323 case NEON::BI__builtin_neon_vcvtnq_s64_v: 6324 case NEON::BI__builtin_neon_vcvtn_u64_v: 6325 case NEON::BI__builtin_neon_vcvtnq_u64_v: { 6326 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns; 6327 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6328 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); 6329 } 6330 case NEON::BI__builtin_neon_vcvtp_s32_v: 6331 case NEON::BI__builtin_neon_vcvtpq_s32_v: 6332 case NEON::BI__builtin_neon_vcvtp_u32_v: 6333 case NEON::BI__builtin_neon_vcvtpq_u32_v: 6334 case NEON::BI__builtin_neon_vcvtp_s64_v: 6335 case NEON::BI__builtin_neon_vcvtpq_s64_v: 6336 case NEON::BI__builtin_neon_vcvtp_u64_v: 6337 case NEON::BI__builtin_neon_vcvtpq_u64_v: { 6338 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps; 6339 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 6340 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); 6341 } 6342 case NEON::BI__builtin_neon_vmulx_v: 6343 case NEON::BI__builtin_neon_vmulxq_v: { 6344 Int = Intrinsic::aarch64_neon_fmulx; 6345 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 6346 } 6347 case NEON::BI__builtin_neon_vmul_lane_v: 6348 case NEON::BI__builtin_neon_vmul_laneq_v: { 6349 // v1f64 vmul_lane should be mapped to Neon scalar mul lane 6350 bool Quad = false; 6351 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) 6352 Quad = true; 6353 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6354 llvm::Type *VTy = GetNeonType(this, 6355 NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); 6356 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 6357 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 6358 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); 6359 return Builder.CreateBitCast(Result, Ty); 6360 } 6361 case NEON::BI__builtin_neon_vnegd_s64: 6362 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); 6363 case NEON::BI__builtin_neon_vpmaxnm_v: 6364 case NEON::BI__builtin_neon_vpmaxnmq_v: { 6365 Int = Intrinsic::aarch64_neon_fmaxnmp; 6366 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); 6367 } 6368 case NEON::BI__builtin_neon_vpminnm_v: 6369 case NEON::BI__builtin_neon_vpminnmq_v: { 6370 Int = Intrinsic::aarch64_neon_fminnmp; 6371 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); 6372 } 6373 case NEON::BI__builtin_neon_vsqrt_v: 6374 case NEON::BI__builtin_neon_vsqrtq_v: { 6375 Int = Intrinsic::sqrt; 6376 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6377 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); 6378 } 6379 case NEON::BI__builtin_neon_vrbit_v: 6380 case NEON::BI__builtin_neon_vrbitq_v: { 6381 Int = Intrinsic::aarch64_neon_rbit; 6382 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); 6383 } 6384 case NEON::BI__builtin_neon_vaddv_u8: 6385 // FIXME: These are handled by the AArch64 scalar code. 6386 usgn = true; 6387 // FALLTHROUGH 6388 case NEON::BI__builtin_neon_vaddv_s8: { 6389 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6390 Ty = Int32Ty; 6391 VTy = llvm::VectorType::get(Int8Ty, 8); 6392 llvm::Type *Tys[2] = { Ty, VTy }; 6393 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6394 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6395 return Builder.CreateTrunc(Ops[0], Int8Ty); 6396 } 6397 case NEON::BI__builtin_neon_vaddv_u16: 6398 usgn = true; 6399 // FALLTHROUGH 6400 case NEON::BI__builtin_neon_vaddv_s16: { 6401 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6402 Ty = Int32Ty; 6403 VTy = llvm::VectorType::get(Int16Ty, 4); 6404 llvm::Type *Tys[2] = { Ty, VTy }; 6405 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6406 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6407 return Builder.CreateTrunc(Ops[0], Int16Ty); 6408 } 6409 case NEON::BI__builtin_neon_vaddvq_u8: 6410 usgn = true; 6411 // FALLTHROUGH 6412 case NEON::BI__builtin_neon_vaddvq_s8: { 6413 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6414 Ty = Int32Ty; 6415 VTy = llvm::VectorType::get(Int8Ty, 16); 6416 llvm::Type *Tys[2] = { Ty, VTy }; 6417 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6418 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6419 return Builder.CreateTrunc(Ops[0], Int8Ty); 6420 } 6421 case NEON::BI__builtin_neon_vaddvq_u16: 6422 usgn = true; 6423 // FALLTHROUGH 6424 case NEON::BI__builtin_neon_vaddvq_s16: { 6425 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 6426 Ty = Int32Ty; 6427 VTy = llvm::VectorType::get(Int16Ty, 8); 6428 llvm::Type *Tys[2] = { Ty, VTy }; 6429 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6430 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 6431 return Builder.CreateTrunc(Ops[0], Int16Ty); 6432 } 6433 case NEON::BI__builtin_neon_vmaxv_u8: { 6434 Int = Intrinsic::aarch64_neon_umaxv; 6435 Ty = Int32Ty; 6436 VTy = llvm::VectorType::get(Int8Ty, 8); 6437 llvm::Type *Tys[2] = { Ty, VTy }; 6438 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6439 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6440 return Builder.CreateTrunc(Ops[0], Int8Ty); 6441 } 6442 case NEON::BI__builtin_neon_vmaxv_u16: { 6443 Int = Intrinsic::aarch64_neon_umaxv; 6444 Ty = Int32Ty; 6445 VTy = llvm::VectorType::get(Int16Ty, 4); 6446 llvm::Type *Tys[2] = { Ty, VTy }; 6447 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6448 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6449 return Builder.CreateTrunc(Ops[0], Int16Ty); 6450 } 6451 case NEON::BI__builtin_neon_vmaxvq_u8: { 6452 Int = Intrinsic::aarch64_neon_umaxv; 6453 Ty = Int32Ty; 6454 VTy = llvm::VectorType::get(Int8Ty, 16); 6455 llvm::Type *Tys[2] = { Ty, VTy }; 6456 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6457 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6458 return Builder.CreateTrunc(Ops[0], Int8Ty); 6459 } 6460 case NEON::BI__builtin_neon_vmaxvq_u16: { 6461 Int = Intrinsic::aarch64_neon_umaxv; 6462 Ty = Int32Ty; 6463 VTy = llvm::VectorType::get(Int16Ty, 8); 6464 llvm::Type *Tys[2] = { Ty, VTy }; 6465 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6466 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6467 return Builder.CreateTrunc(Ops[0], Int16Ty); 6468 } 6469 case NEON::BI__builtin_neon_vmaxv_s8: { 6470 Int = Intrinsic::aarch64_neon_smaxv; 6471 Ty = Int32Ty; 6472 VTy = llvm::VectorType::get(Int8Ty, 8); 6473 llvm::Type *Tys[2] = { Ty, VTy }; 6474 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6475 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6476 return Builder.CreateTrunc(Ops[0], Int8Ty); 6477 } 6478 case NEON::BI__builtin_neon_vmaxv_s16: { 6479 Int = Intrinsic::aarch64_neon_smaxv; 6480 Ty = Int32Ty; 6481 VTy = llvm::VectorType::get(Int16Ty, 4); 6482 llvm::Type *Tys[2] = { Ty, VTy }; 6483 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6484 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6485 return Builder.CreateTrunc(Ops[0], Int16Ty); 6486 } 6487 case NEON::BI__builtin_neon_vmaxvq_s8: { 6488 Int = Intrinsic::aarch64_neon_smaxv; 6489 Ty = Int32Ty; 6490 VTy = llvm::VectorType::get(Int8Ty, 16); 6491 llvm::Type *Tys[2] = { Ty, VTy }; 6492 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6493 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6494 return Builder.CreateTrunc(Ops[0], Int8Ty); 6495 } 6496 case NEON::BI__builtin_neon_vmaxvq_s16: { 6497 Int = Intrinsic::aarch64_neon_smaxv; 6498 Ty = Int32Ty; 6499 VTy = llvm::VectorType::get(Int16Ty, 8); 6500 llvm::Type *Tys[2] = { Ty, VTy }; 6501 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6502 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6503 return Builder.CreateTrunc(Ops[0], Int16Ty); 6504 } 6505 case NEON::BI__builtin_neon_vminv_u8: { 6506 Int = Intrinsic::aarch64_neon_uminv; 6507 Ty = Int32Ty; 6508 VTy = llvm::VectorType::get(Int8Ty, 8); 6509 llvm::Type *Tys[2] = { Ty, VTy }; 6510 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6511 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6512 return Builder.CreateTrunc(Ops[0], Int8Ty); 6513 } 6514 case NEON::BI__builtin_neon_vminv_u16: { 6515 Int = Intrinsic::aarch64_neon_uminv; 6516 Ty = Int32Ty; 6517 VTy = llvm::VectorType::get(Int16Ty, 4); 6518 llvm::Type *Tys[2] = { Ty, VTy }; 6519 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6520 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6521 return Builder.CreateTrunc(Ops[0], Int16Ty); 6522 } 6523 case NEON::BI__builtin_neon_vminvq_u8: { 6524 Int = Intrinsic::aarch64_neon_uminv; 6525 Ty = Int32Ty; 6526 VTy = llvm::VectorType::get(Int8Ty, 16); 6527 llvm::Type *Tys[2] = { Ty, VTy }; 6528 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6529 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6530 return Builder.CreateTrunc(Ops[0], Int8Ty); 6531 } 6532 case NEON::BI__builtin_neon_vminvq_u16: { 6533 Int = Intrinsic::aarch64_neon_uminv; 6534 Ty = Int32Ty; 6535 VTy = llvm::VectorType::get(Int16Ty, 8); 6536 llvm::Type *Tys[2] = { Ty, VTy }; 6537 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6538 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6539 return Builder.CreateTrunc(Ops[0], Int16Ty); 6540 } 6541 case NEON::BI__builtin_neon_vminv_s8: { 6542 Int = Intrinsic::aarch64_neon_sminv; 6543 Ty = Int32Ty; 6544 VTy = llvm::VectorType::get(Int8Ty, 8); 6545 llvm::Type *Tys[2] = { Ty, VTy }; 6546 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6547 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6548 return Builder.CreateTrunc(Ops[0], Int8Ty); 6549 } 6550 case NEON::BI__builtin_neon_vminv_s16: { 6551 Int = Intrinsic::aarch64_neon_sminv; 6552 Ty = Int32Ty; 6553 VTy = llvm::VectorType::get(Int16Ty, 4); 6554 llvm::Type *Tys[2] = { Ty, VTy }; 6555 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6556 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6557 return Builder.CreateTrunc(Ops[0], Int16Ty); 6558 } 6559 case NEON::BI__builtin_neon_vminvq_s8: { 6560 Int = Intrinsic::aarch64_neon_sminv; 6561 Ty = Int32Ty; 6562 VTy = llvm::VectorType::get(Int8Ty, 16); 6563 llvm::Type *Tys[2] = { Ty, VTy }; 6564 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6565 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6566 return Builder.CreateTrunc(Ops[0], Int8Ty); 6567 } 6568 case NEON::BI__builtin_neon_vminvq_s16: { 6569 Int = Intrinsic::aarch64_neon_sminv; 6570 Ty = Int32Ty; 6571 VTy = llvm::VectorType::get(Int16Ty, 8); 6572 llvm::Type *Tys[2] = { Ty, VTy }; 6573 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6574 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6575 return Builder.CreateTrunc(Ops[0], Int16Ty); 6576 } 6577 case NEON::BI__builtin_neon_vmul_n_f64: { 6578 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6579 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); 6580 return Builder.CreateFMul(Ops[0], RHS); 6581 } 6582 case NEON::BI__builtin_neon_vaddlv_u8: { 6583 Int = Intrinsic::aarch64_neon_uaddlv; 6584 Ty = Int32Ty; 6585 VTy = llvm::VectorType::get(Int8Ty, 8); 6586 llvm::Type *Tys[2] = { Ty, VTy }; 6587 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6588 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6589 return Builder.CreateTrunc(Ops[0], Int16Ty); 6590 } 6591 case NEON::BI__builtin_neon_vaddlv_u16: { 6592 Int = Intrinsic::aarch64_neon_uaddlv; 6593 Ty = Int32Ty; 6594 VTy = llvm::VectorType::get(Int16Ty, 4); 6595 llvm::Type *Tys[2] = { Ty, VTy }; 6596 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6597 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6598 } 6599 case NEON::BI__builtin_neon_vaddlvq_u8: { 6600 Int = Intrinsic::aarch64_neon_uaddlv; 6601 Ty = Int32Ty; 6602 VTy = llvm::VectorType::get(Int8Ty, 16); 6603 llvm::Type *Tys[2] = { Ty, VTy }; 6604 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6605 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6606 return Builder.CreateTrunc(Ops[0], Int16Ty); 6607 } 6608 case NEON::BI__builtin_neon_vaddlvq_u16: { 6609 Int = Intrinsic::aarch64_neon_uaddlv; 6610 Ty = Int32Ty; 6611 VTy = llvm::VectorType::get(Int16Ty, 8); 6612 llvm::Type *Tys[2] = { Ty, VTy }; 6613 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6614 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6615 } 6616 case NEON::BI__builtin_neon_vaddlv_s8: { 6617 Int = Intrinsic::aarch64_neon_saddlv; 6618 Ty = Int32Ty; 6619 VTy = llvm::VectorType::get(Int8Ty, 8); 6620 llvm::Type *Tys[2] = { Ty, VTy }; 6621 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6622 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6623 return Builder.CreateTrunc(Ops[0], Int16Ty); 6624 } 6625 case NEON::BI__builtin_neon_vaddlv_s16: { 6626 Int = Intrinsic::aarch64_neon_saddlv; 6627 Ty = Int32Ty; 6628 VTy = llvm::VectorType::get(Int16Ty, 4); 6629 llvm::Type *Tys[2] = { Ty, VTy }; 6630 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6631 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6632 } 6633 case NEON::BI__builtin_neon_vaddlvq_s8: { 6634 Int = Intrinsic::aarch64_neon_saddlv; 6635 Ty = Int32Ty; 6636 VTy = llvm::VectorType::get(Int8Ty, 16); 6637 llvm::Type *Tys[2] = { Ty, VTy }; 6638 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6639 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6640 return Builder.CreateTrunc(Ops[0], Int16Ty); 6641 } 6642 case NEON::BI__builtin_neon_vaddlvq_s16: { 6643 Int = Intrinsic::aarch64_neon_saddlv; 6644 Ty = Int32Ty; 6645 VTy = llvm::VectorType::get(Int16Ty, 8); 6646 llvm::Type *Tys[2] = { Ty, VTy }; 6647 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6648 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6649 } 6650 case NEON::BI__builtin_neon_vsri_n_v: 6651 case NEON::BI__builtin_neon_vsriq_n_v: { 6652 Int = Intrinsic::aarch64_neon_vsri; 6653 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 6654 return EmitNeonCall(Intrin, Ops, "vsri_n"); 6655 } 6656 case NEON::BI__builtin_neon_vsli_n_v: 6657 case NEON::BI__builtin_neon_vsliq_n_v: { 6658 Int = Intrinsic::aarch64_neon_vsli; 6659 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 6660 return EmitNeonCall(Intrin, Ops, "vsli_n"); 6661 } 6662 case NEON::BI__builtin_neon_vsra_n_v: 6663 case NEON::BI__builtin_neon_vsraq_n_v: 6664 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6665 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 6666 return Builder.CreateAdd(Ops[0], Ops[1]); 6667 case NEON::BI__builtin_neon_vrsra_n_v: 6668 case NEON::BI__builtin_neon_vrsraq_n_v: { 6669 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; 6670 SmallVector<llvm::Value*,2> TmpOps; 6671 TmpOps.push_back(Ops[1]); 6672 TmpOps.push_back(Ops[2]); 6673 Function* F = CGM.getIntrinsic(Int, Ty); 6674 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true); 6675 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 6676 return Builder.CreateAdd(Ops[0], tmp); 6677 } 6678 // FIXME: Sharing loads & stores with 32-bit is complicated by the absence 6679 // of an Align parameter here. 6680 case NEON::BI__builtin_neon_vld1_x2_v: 6681 case NEON::BI__builtin_neon_vld1q_x2_v: 6682 case NEON::BI__builtin_neon_vld1_x3_v: 6683 case NEON::BI__builtin_neon_vld1q_x3_v: 6684 case NEON::BI__builtin_neon_vld1_x4_v: 6685 case NEON::BI__builtin_neon_vld1q_x4_v: { 6686 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 6687 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6688 llvm::Type *Tys[2] = { VTy, PTy }; 6689 unsigned Int; 6690 switch (BuiltinID) { 6691 case NEON::BI__builtin_neon_vld1_x2_v: 6692 case NEON::BI__builtin_neon_vld1q_x2_v: 6693 Int = Intrinsic::aarch64_neon_ld1x2; 6694 break; 6695 case NEON::BI__builtin_neon_vld1_x3_v: 6696 case NEON::BI__builtin_neon_vld1q_x3_v: 6697 Int = Intrinsic::aarch64_neon_ld1x3; 6698 break; 6699 case NEON::BI__builtin_neon_vld1_x4_v: 6700 case NEON::BI__builtin_neon_vld1q_x4_v: 6701 Int = Intrinsic::aarch64_neon_ld1x4; 6702 break; 6703 } 6704 Function *F = CGM.getIntrinsic(Int, Tys); 6705 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); 6706 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6707 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6708 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6709 } 6710 case NEON::BI__builtin_neon_vst1_x2_v: 6711 case NEON::BI__builtin_neon_vst1q_x2_v: 6712 case NEON::BI__builtin_neon_vst1_x3_v: 6713 case NEON::BI__builtin_neon_vst1q_x3_v: 6714 case NEON::BI__builtin_neon_vst1_x4_v: 6715 case NEON::BI__builtin_neon_vst1q_x4_v: { 6716 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 6717 llvm::Type *Tys[2] = { VTy, PTy }; 6718 unsigned Int; 6719 switch (BuiltinID) { 6720 case NEON::BI__builtin_neon_vst1_x2_v: 6721 case NEON::BI__builtin_neon_vst1q_x2_v: 6722 Int = Intrinsic::aarch64_neon_st1x2; 6723 break; 6724 case NEON::BI__builtin_neon_vst1_x3_v: 6725 case NEON::BI__builtin_neon_vst1q_x3_v: 6726 Int = Intrinsic::aarch64_neon_st1x3; 6727 break; 6728 case NEON::BI__builtin_neon_vst1_x4_v: 6729 case NEON::BI__builtin_neon_vst1q_x4_v: 6730 Int = Intrinsic::aarch64_neon_st1x4; 6731 break; 6732 } 6733 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); 6734 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 6735 } 6736 case NEON::BI__builtin_neon_vld1_v: 6737 case NEON::BI__builtin_neon_vld1q_v: { 6738 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 6739 auto Alignment = CharUnits::fromQuantity( 6740 BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16); 6741 return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment); 6742 } 6743 case NEON::BI__builtin_neon_vst1_v: 6744 case NEON::BI__builtin_neon_vst1q_v: 6745 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 6746 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 6747 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6748 case NEON::BI__builtin_neon_vld1_lane_v: 6749 case NEON::BI__builtin_neon_vld1q_lane_v: { 6750 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6751 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 6752 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6753 auto Alignment = CharUnits::fromQuantity( 6754 BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16); 6755 Ops[0] = 6756 Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); 6757 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 6758 } 6759 case NEON::BI__builtin_neon_vld1_dup_v: 6760 case NEON::BI__builtin_neon_vld1q_dup_v: { 6761 Value *V = UndefValue::get(Ty); 6762 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 6763 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6764 auto Alignment = CharUnits::fromQuantity( 6765 BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16); 6766 Ops[0] = 6767 Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); 6768 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 6769 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 6770 return EmitNeonSplat(Ops[0], CI); 6771 } 6772 case NEON::BI__builtin_neon_vst1_lane_v: 6773 case NEON::BI__builtin_neon_vst1q_lane_v: 6774 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6775 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 6776 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6777 return Builder.CreateDefaultAlignedStore(Ops[1], 6778 Builder.CreateBitCast(Ops[0], Ty)); 6779 case NEON::BI__builtin_neon_vld2_v: 6780 case NEON::BI__builtin_neon_vld2q_v: { 6781 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6782 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6783 llvm::Type *Tys[2] = { VTy, PTy }; 6784 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); 6785 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 6786 Ops[0] = Builder.CreateBitCast(Ops[0], 6787 llvm::PointerType::getUnqual(Ops[1]->getType())); 6788 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6789 } 6790 case NEON::BI__builtin_neon_vld3_v: 6791 case NEON::BI__builtin_neon_vld3q_v: { 6792 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6793 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6794 llvm::Type *Tys[2] = { VTy, PTy }; 6795 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); 6796 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 6797 Ops[0] = Builder.CreateBitCast(Ops[0], 6798 llvm::PointerType::getUnqual(Ops[1]->getType())); 6799 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6800 } 6801 case NEON::BI__builtin_neon_vld4_v: 6802 case NEON::BI__builtin_neon_vld4q_v: { 6803 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6804 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6805 llvm::Type *Tys[2] = { VTy, PTy }; 6806 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); 6807 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 6808 Ops[0] = Builder.CreateBitCast(Ops[0], 6809 llvm::PointerType::getUnqual(Ops[1]->getType())); 6810 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6811 } 6812 case NEON::BI__builtin_neon_vld2_dup_v: 6813 case NEON::BI__builtin_neon_vld2q_dup_v: { 6814 llvm::Type *PTy = 6815 llvm::PointerType::getUnqual(VTy->getElementType()); 6816 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6817 llvm::Type *Tys[2] = { VTy, PTy }; 6818 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); 6819 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 6820 Ops[0] = Builder.CreateBitCast(Ops[0], 6821 llvm::PointerType::getUnqual(Ops[1]->getType())); 6822 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6823 } 6824 case NEON::BI__builtin_neon_vld3_dup_v: 6825 case NEON::BI__builtin_neon_vld3q_dup_v: { 6826 llvm::Type *PTy = 6827 llvm::PointerType::getUnqual(VTy->getElementType()); 6828 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6829 llvm::Type *Tys[2] = { VTy, PTy }; 6830 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); 6831 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 6832 Ops[0] = Builder.CreateBitCast(Ops[0], 6833 llvm::PointerType::getUnqual(Ops[1]->getType())); 6834 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6835 } 6836 case NEON::BI__builtin_neon_vld4_dup_v: 6837 case NEON::BI__builtin_neon_vld4q_dup_v: { 6838 llvm::Type *PTy = 6839 llvm::PointerType::getUnqual(VTy->getElementType()); 6840 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6841 llvm::Type *Tys[2] = { VTy, PTy }; 6842 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); 6843 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 6844 Ops[0] = Builder.CreateBitCast(Ops[0], 6845 llvm::PointerType::getUnqual(Ops[1]->getType())); 6846 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6847 } 6848 case NEON::BI__builtin_neon_vld2_lane_v: 6849 case NEON::BI__builtin_neon_vld2q_lane_v: { 6850 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 6851 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); 6852 Ops.push_back(Ops[1]); 6853 Ops.erase(Ops.begin()+1); 6854 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6855 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6856 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 6857 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); 6858 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6859 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6860 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6861 } 6862 case NEON::BI__builtin_neon_vld3_lane_v: 6863 case NEON::BI__builtin_neon_vld3q_lane_v: { 6864 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 6865 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); 6866 Ops.push_back(Ops[1]); 6867 Ops.erase(Ops.begin()+1); 6868 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6869 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6870 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 6871 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 6872 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); 6873 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6874 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6875 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6876 } 6877 case NEON::BI__builtin_neon_vld4_lane_v: 6878 case NEON::BI__builtin_neon_vld4q_lane_v: { 6879 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 6880 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); 6881 Ops.push_back(Ops[1]); 6882 Ops.erase(Ops.begin()+1); 6883 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6884 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6885 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 6886 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 6887 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty); 6888 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane"); 6889 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6890 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6891 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6892 } 6893 case NEON::BI__builtin_neon_vst2_v: 6894 case NEON::BI__builtin_neon_vst2q_v: { 6895 Ops.push_back(Ops[0]); 6896 Ops.erase(Ops.begin()); 6897 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; 6898 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), 6899 Ops, ""); 6900 } 6901 case NEON::BI__builtin_neon_vst2_lane_v: 6902 case NEON::BI__builtin_neon_vst2q_lane_v: { 6903 Ops.push_back(Ops[0]); 6904 Ops.erase(Ops.begin()); 6905 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); 6906 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 6907 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), 6908 Ops, ""); 6909 } 6910 case NEON::BI__builtin_neon_vst3_v: 6911 case NEON::BI__builtin_neon_vst3q_v: { 6912 Ops.push_back(Ops[0]); 6913 Ops.erase(Ops.begin()); 6914 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 6915 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), 6916 Ops, ""); 6917 } 6918 case NEON::BI__builtin_neon_vst3_lane_v: 6919 case NEON::BI__builtin_neon_vst3q_lane_v: { 6920 Ops.push_back(Ops[0]); 6921 Ops.erase(Ops.begin()); 6922 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 6923 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 6924 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), 6925 Ops, ""); 6926 } 6927 case NEON::BI__builtin_neon_vst4_v: 6928 case NEON::BI__builtin_neon_vst4q_v: { 6929 Ops.push_back(Ops[0]); 6930 Ops.erase(Ops.begin()); 6931 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 6932 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), 6933 Ops, ""); 6934 } 6935 case NEON::BI__builtin_neon_vst4_lane_v: 6936 case NEON::BI__builtin_neon_vst4q_lane_v: { 6937 Ops.push_back(Ops[0]); 6938 Ops.erase(Ops.begin()); 6939 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 6940 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; 6941 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), 6942 Ops, ""); 6943 } 6944 case NEON::BI__builtin_neon_vtrn_v: 6945 case NEON::BI__builtin_neon_vtrnq_v: { 6946 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 6947 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6948 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6949 Value *SV = nullptr; 6950 6951 for (unsigned vi = 0; vi != 2; ++vi) { 6952 SmallVector<uint32_t, 16> Indices; 6953 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 6954 Indices.push_back(i+vi); 6955 Indices.push_back(i+e+vi); 6956 } 6957 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 6958 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 6959 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 6960 } 6961 return SV; 6962 } 6963 case NEON::BI__builtin_neon_vuzp_v: 6964 case NEON::BI__builtin_neon_vuzpq_v: { 6965 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 6966 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6967 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6968 Value *SV = nullptr; 6969 6970 for (unsigned vi = 0; vi != 2; ++vi) { 6971 SmallVector<uint32_t, 16> Indices; 6972 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 6973 Indices.push_back(2*i+vi); 6974 6975 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 6976 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 6977 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 6978 } 6979 return SV; 6980 } 6981 case NEON::BI__builtin_neon_vzip_v: 6982 case NEON::BI__builtin_neon_vzipq_v: { 6983 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 6984 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6985 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6986 Value *SV = nullptr; 6987 6988 for (unsigned vi = 0; vi != 2; ++vi) { 6989 SmallVector<uint32_t, 16> Indices; 6990 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 6991 Indices.push_back((i + vi*e) >> 1); 6992 Indices.push_back(((i + vi*e) >> 1)+e); 6993 } 6994 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 6995 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 6996 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 6997 } 6998 return SV; 6999 } 7000 case NEON::BI__builtin_neon_vqtbl1q_v: { 7001 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty), 7002 Ops, "vtbl1"); 7003 } 7004 case NEON::BI__builtin_neon_vqtbl2q_v: { 7005 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty), 7006 Ops, "vtbl2"); 7007 } 7008 case NEON::BI__builtin_neon_vqtbl3q_v: { 7009 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty), 7010 Ops, "vtbl3"); 7011 } 7012 case NEON::BI__builtin_neon_vqtbl4q_v: { 7013 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty), 7014 Ops, "vtbl4"); 7015 } 7016 case NEON::BI__builtin_neon_vqtbx1q_v: { 7017 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty), 7018 Ops, "vtbx1"); 7019 } 7020 case NEON::BI__builtin_neon_vqtbx2q_v: { 7021 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty), 7022 Ops, "vtbx2"); 7023 } 7024 case NEON::BI__builtin_neon_vqtbx3q_v: { 7025 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty), 7026 Ops, "vtbx3"); 7027 } 7028 case NEON::BI__builtin_neon_vqtbx4q_v: { 7029 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty), 7030 Ops, "vtbx4"); 7031 } 7032 case NEON::BI__builtin_neon_vsqadd_v: 7033 case NEON::BI__builtin_neon_vsqaddq_v: { 7034 Int = Intrinsic::aarch64_neon_usqadd; 7035 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); 7036 } 7037 case NEON::BI__builtin_neon_vuqadd_v: 7038 case NEON::BI__builtin_neon_vuqaddq_v: { 7039 Int = Intrinsic::aarch64_neon_suqadd; 7040 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); 7041 } 7042 } 7043 } 7044 7045 llvm::Value *CodeGenFunction:: 7046 BuildVector(ArrayRef<llvm::Value*> Ops) { 7047 assert((Ops.size() & (Ops.size() - 1)) == 0 && 7048 "Not a power-of-two sized vector!"); 7049 bool AllConstants = true; 7050 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 7051 AllConstants &= isa<Constant>(Ops[i]); 7052 7053 // If this is a constant vector, create a ConstantVector. 7054 if (AllConstants) { 7055 SmallVector<llvm::Constant*, 16> CstOps; 7056 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 7057 CstOps.push_back(cast<Constant>(Ops[i])); 7058 return llvm::ConstantVector::get(CstOps); 7059 } 7060 7061 // Otherwise, insertelement the values to build the vector. 7062 Value *Result = 7063 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 7064 7065 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 7066 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); 7067 7068 return Result; 7069 } 7070 7071 // Convert the mask from an integer type to a vector of i1. 7072 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, 7073 unsigned NumElts) { 7074 7075 llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(), 7076 cast<IntegerType>(Mask->getType())->getBitWidth()); 7077 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy); 7078 7079 // If we have less than 8 elements, then the starting mask was an i8 and 7080 // we need to extract down to the right number of elements. 7081 if (NumElts < 8) { 7082 uint32_t Indices[4]; 7083 for (unsigned i = 0; i != NumElts; ++i) 7084 Indices[i] = i; 7085 MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec, 7086 makeArrayRef(Indices, NumElts), 7087 "extract"); 7088 } 7089 return MaskVec; 7090 } 7091 7092 static Value *EmitX86MaskedStore(CodeGenFunction &CGF, 7093 SmallVectorImpl<Value *> &Ops, 7094 unsigned Align) { 7095 // Cast the pointer to right type. 7096 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 7097 llvm::PointerType::getUnqual(Ops[1]->getType())); 7098 7099 // If the mask is all ones just emit a regular store. 7100 if (const auto *C = dyn_cast<Constant>(Ops[2])) 7101 if (C->isAllOnesValue()) 7102 return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align); 7103 7104 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 7105 Ops[1]->getType()->getVectorNumElements()); 7106 7107 return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec); 7108 } 7109 7110 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, 7111 SmallVectorImpl<Value *> &Ops, unsigned Align) { 7112 // Cast the pointer to right type. 7113 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 7114 llvm::PointerType::getUnqual(Ops[1]->getType())); 7115 7116 // If the mask is all ones just emit a regular store. 7117 if (const auto *C = dyn_cast<Constant>(Ops[2])) 7118 if (C->isAllOnesValue()) 7119 return CGF.Builder.CreateAlignedLoad(Ops[0], Align); 7120 7121 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 7122 Ops[1]->getType()->getVectorNumElements()); 7123 7124 return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); 7125 } 7126 7127 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF, 7128 SmallVectorImpl<Value *> &Ops, 7129 llvm::Type *DstTy, 7130 unsigned SrcSizeInBits, 7131 unsigned Align) { 7132 // Load the subvector. 7133 Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align); 7134 7135 // Create broadcast mask. 7136 unsigned NumDstElts = DstTy->getVectorNumElements(); 7137 unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits(); 7138 7139 SmallVector<uint32_t, 8> Mask; 7140 for (unsigned i = 0; i != NumDstElts; i += NumSrcElts) 7141 for (unsigned j = 0; j != NumSrcElts; ++j) 7142 Mask.push_back(j); 7143 7144 return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst"); 7145 } 7146 7147 static Value *EmitX86Select(CodeGenFunction &CGF, 7148 Value *Mask, Value *Op0, Value *Op1) { 7149 7150 // If the mask is all ones just return first argument. 7151 if (const auto *C = dyn_cast<Constant>(Mask)) 7152 if (C->isAllOnesValue()) 7153 return Op0; 7154 7155 Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements()); 7156 7157 return CGF.Builder.CreateSelect(Mask, Op0, Op1); 7158 } 7159 7160 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, 7161 bool Signed, SmallVectorImpl<Value *> &Ops) { 7162 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 7163 Value *Cmp; 7164 7165 if (CC == 3) { 7166 Cmp = Constant::getNullValue( 7167 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 7168 } else if (CC == 7) { 7169 Cmp = Constant::getAllOnesValue( 7170 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 7171 } else { 7172 ICmpInst::Predicate Pred; 7173 switch (CC) { 7174 default: llvm_unreachable("Unknown condition code"); 7175 case 0: Pred = ICmpInst::ICMP_EQ; break; 7176 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; 7177 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; 7178 case 4: Pred = ICmpInst::ICMP_NE; break; 7179 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; 7180 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; 7181 } 7182 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 7183 } 7184 7185 const auto *C = dyn_cast<Constant>(Ops.back()); 7186 if (!C || !C->isAllOnesValue()) 7187 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts)); 7188 7189 if (NumElts < 8) { 7190 uint32_t Indices[8]; 7191 for (unsigned i = 0; i != NumElts; ++i) 7192 Indices[i] = i; 7193 for (unsigned i = NumElts; i != 8; ++i) 7194 Indices[i] = i % NumElts + NumElts; 7195 Cmp = CGF.Builder.CreateShuffleVector( 7196 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); 7197 } 7198 return CGF.Builder.CreateBitCast(Cmp, 7199 IntegerType::get(CGF.getLLVMContext(), 7200 std::max(NumElts, 8U))); 7201 } 7202 7203 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, 7204 ArrayRef<Value *> Ops) { 7205 Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 7206 Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 7207 7208 if (Ops.size() == 2) 7209 return Res; 7210 7211 assert(Ops.size() == 4); 7212 return EmitX86Select(CGF, Ops[3], Res, Ops[2]); 7213 } 7214 7215 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, 7216 llvm::Type *DstTy) { 7217 unsigned NumberOfElements = DstTy->getVectorNumElements(); 7218 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements); 7219 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); 7220 } 7221 7222 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 7223 const CallExpr *E) { 7224 if (BuiltinID == X86::BI__builtin_ms_va_start || 7225 BuiltinID == X86::BI__builtin_ms_va_end) 7226 return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), 7227 BuiltinID == X86::BI__builtin_ms_va_start); 7228 if (BuiltinID == X86::BI__builtin_ms_va_copy) { 7229 // Lower this manually. We can't reliably determine whether or not any 7230 // given va_copy() is for a Win64 va_list from the calling convention 7231 // alone, because it's legal to do this from a System V ABI function. 7232 // With opaque pointer types, we won't have enough information in LLVM 7233 // IR to determine this from the argument types, either. Best to do it 7234 // now, while we have enough information. 7235 Address DestAddr = EmitMSVAListRef(E->getArg(0)); 7236 Address SrcAddr = EmitMSVAListRef(E->getArg(1)); 7237 7238 llvm::Type *BPP = Int8PtrPtrTy; 7239 7240 DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), 7241 DestAddr.getAlignment()); 7242 SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), 7243 SrcAddr.getAlignment()); 7244 7245 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); 7246 return Builder.CreateStore(ArgPtr, DestAddr); 7247 } 7248 7249 SmallVector<Value*, 4> Ops; 7250 7251 // Find out if any arguments are required to be integer constant expressions. 7252 unsigned ICEArguments = 0; 7253 ASTContext::GetBuiltinTypeError Error; 7254 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 7255 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 7256 7257 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 7258 // If this is a normal argument, just emit it as a scalar. 7259 if ((ICEArguments & (1 << i)) == 0) { 7260 Ops.push_back(EmitScalarExpr(E->getArg(i))); 7261 continue; 7262 } 7263 7264 // If this is required to be a constant, constant fold it so that we know 7265 // that the generated intrinsic gets a ConstantInt. 7266 llvm::APSInt Result; 7267 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 7268 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 7269 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 7270 } 7271 7272 // These exist so that the builtin that takes an immediate can be bounds 7273 // checked by clang to avoid passing bad immediates to the backend. Since 7274 // AVX has a larger immediate than SSE we would need separate builtins to 7275 // do the different bounds checking. Rather than create a clang specific 7276 // SSE only builtin, this implements eight separate builtins to match gcc 7277 // implementation. 7278 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) { 7279 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); 7280 llvm::Function *F = CGM.getIntrinsic(ID); 7281 return Builder.CreateCall(F, Ops); 7282 }; 7283 7284 // For the vector forms of FP comparisons, translate the builtins directly to 7285 // IR. 7286 // TODO: The builtins could be removed if the SSE header files used vector 7287 // extension comparisons directly (vector ordered/unordered may need 7288 // additional support via __builtin_isnan()). 7289 auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) { 7290 Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); 7291 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType()); 7292 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy); 7293 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); 7294 return Builder.CreateBitCast(Sext, FPVecTy); 7295 }; 7296 7297 switch (BuiltinID) { 7298 default: return nullptr; 7299 case X86::BI__builtin_cpu_supports: { 7300 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts(); 7301 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString(); 7302 7303 // TODO: When/if this becomes more than x86 specific then use a TargetInfo 7304 // based mapping. 7305 // Processor features and mapping to processor feature value. 7306 enum X86Features { 7307 CMOV = 0, 7308 MMX, 7309 POPCNT, 7310 SSE, 7311 SSE2, 7312 SSE3, 7313 SSSE3, 7314 SSE4_1, 7315 SSE4_2, 7316 AVX, 7317 AVX2, 7318 SSE4_A, 7319 FMA4, 7320 XOP, 7321 FMA, 7322 AVX512F, 7323 BMI, 7324 BMI2, 7325 AES, 7326 PCLMUL, 7327 AVX512VL, 7328 AVX512BW, 7329 AVX512DQ, 7330 AVX512CD, 7331 AVX512ER, 7332 AVX512PF, 7333 AVX512VBMI, 7334 AVX512IFMA, 7335 AVX512VPOPCNTDQ, 7336 MAX 7337 }; 7338 7339 X86Features Feature = 7340 StringSwitch<X86Features>(FeatureStr) 7341 .Case("cmov", X86Features::CMOV) 7342 .Case("mmx", X86Features::MMX) 7343 .Case("popcnt", X86Features::POPCNT) 7344 .Case("sse", X86Features::SSE) 7345 .Case("sse2", X86Features::SSE2) 7346 .Case("sse3", X86Features::SSE3) 7347 .Case("ssse3", X86Features::SSSE3) 7348 .Case("sse4.1", X86Features::SSE4_1) 7349 .Case("sse4.2", X86Features::SSE4_2) 7350 .Case("avx", X86Features::AVX) 7351 .Case("avx2", X86Features::AVX2) 7352 .Case("sse4a", X86Features::SSE4_A) 7353 .Case("fma4", X86Features::FMA4) 7354 .Case("xop", X86Features::XOP) 7355 .Case("fma", X86Features::FMA) 7356 .Case("avx512f", X86Features::AVX512F) 7357 .Case("bmi", X86Features::BMI) 7358 .Case("bmi2", X86Features::BMI2) 7359 .Case("aes", X86Features::AES) 7360 .Case("pclmul", X86Features::PCLMUL) 7361 .Case("avx512vl", X86Features::AVX512VL) 7362 .Case("avx512bw", X86Features::AVX512BW) 7363 .Case("avx512dq", X86Features::AVX512DQ) 7364 .Case("avx512cd", X86Features::AVX512CD) 7365 .Case("avx512er", X86Features::AVX512ER) 7366 .Case("avx512pf", X86Features::AVX512PF) 7367 .Case("avx512vbmi", X86Features::AVX512VBMI) 7368 .Case("avx512ifma", X86Features::AVX512IFMA) 7369 .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ) 7370 .Default(X86Features::MAX); 7371 assert(Feature != X86Features::MAX && "Invalid feature!"); 7372 7373 // Matching the struct layout from the compiler-rt/libgcc structure that is 7374 // filled in: 7375 // unsigned int __cpu_vendor; 7376 // unsigned int __cpu_type; 7377 // unsigned int __cpu_subtype; 7378 // unsigned int __cpu_features[1]; 7379 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, 7380 llvm::ArrayType::get(Int32Ty, 1)); 7381 7382 // Grab the global __cpu_model. 7383 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); 7384 7385 // Grab the first (0th) element from the field __cpu_features off of the 7386 // global in the struct STy. 7387 Value *Idxs[] = { 7388 ConstantInt::get(Int32Ty, 0), 7389 ConstantInt::get(Int32Ty, 3), 7390 ConstantInt::get(Int32Ty, 0) 7391 }; 7392 Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); 7393 Value *Features = Builder.CreateAlignedLoad(CpuFeatures, 7394 CharUnits::fromQuantity(4)); 7395 7396 // Check the value of the bit corresponding to the feature requested. 7397 Value *Bitset = Builder.CreateAnd( 7398 Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature)); 7399 return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); 7400 } 7401 case X86::BI_mm_prefetch: { 7402 Value *Address = Ops[0]; 7403 Value *RW = ConstantInt::get(Int32Ty, 0); 7404 Value *Locality = Ops[1]; 7405 Value *Data = ConstantInt::get(Int32Ty, 1); 7406 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 7407 return Builder.CreateCall(F, {Address, RW, Locality, Data}); 7408 } 7409 case X86::BI_mm_clflush: { 7410 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush), 7411 Ops[0]); 7412 } 7413 case X86::BI_mm_lfence: { 7414 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence)); 7415 } 7416 case X86::BI_mm_mfence: { 7417 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence)); 7418 } 7419 case X86::BI_mm_sfence: { 7420 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence)); 7421 } 7422 case X86::BI_mm_pause: { 7423 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause)); 7424 } 7425 case X86::BI__rdtsc: { 7426 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc)); 7427 } 7428 case X86::BI__builtin_ia32_undef128: 7429 case X86::BI__builtin_ia32_undef256: 7430 case X86::BI__builtin_ia32_undef512: 7431 // The x86 definition of "undef" is not the same as the LLVM definition 7432 // (PR32176). We leave optimizing away an unnecessary zero constant to the 7433 // IR optimizer and backend. 7434 // TODO: If we had a "freeze" IR instruction to generate a fixed undef 7435 // value, we should use that here instead of a zero. 7436 return llvm::Constant::getNullValue(ConvertType(E->getType())); 7437 case X86::BI__builtin_ia32_vec_init_v8qi: 7438 case X86::BI__builtin_ia32_vec_init_v4hi: 7439 case X86::BI__builtin_ia32_vec_init_v2si: 7440 return Builder.CreateBitCast(BuildVector(Ops), 7441 llvm::Type::getX86_MMXTy(getLLVMContext())); 7442 case X86::BI__builtin_ia32_vec_ext_v2si: 7443 return Builder.CreateExtractElement(Ops[0], 7444 llvm::ConstantInt::get(Ops[1]->getType(), 0)); 7445 case X86::BI_mm_setcsr: 7446 case X86::BI__builtin_ia32_ldmxcsr: { 7447 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 7448 Builder.CreateStore(Ops[0], Tmp); 7449 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 7450 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 7451 } 7452 case X86::BI_mm_getcsr: 7453 case X86::BI__builtin_ia32_stmxcsr: { 7454 Address Tmp = CreateMemTemp(E->getType()); 7455 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 7456 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 7457 return Builder.CreateLoad(Tmp, "stmxcsr"); 7458 } 7459 case X86::BI__builtin_ia32_xsave: 7460 case X86::BI__builtin_ia32_xsave64: 7461 case X86::BI__builtin_ia32_xrstor: 7462 case X86::BI__builtin_ia32_xrstor64: 7463 case X86::BI__builtin_ia32_xsaveopt: 7464 case X86::BI__builtin_ia32_xsaveopt64: 7465 case X86::BI__builtin_ia32_xrstors: 7466 case X86::BI__builtin_ia32_xrstors64: 7467 case X86::BI__builtin_ia32_xsavec: 7468 case X86::BI__builtin_ia32_xsavec64: 7469 case X86::BI__builtin_ia32_xsaves: 7470 case X86::BI__builtin_ia32_xsaves64: { 7471 Intrinsic::ID ID; 7472 #define INTRINSIC_X86_XSAVE_ID(NAME) \ 7473 case X86::BI__builtin_ia32_##NAME: \ 7474 ID = Intrinsic::x86_##NAME; \ 7475 break 7476 switch (BuiltinID) { 7477 default: llvm_unreachable("Unsupported intrinsic!"); 7478 INTRINSIC_X86_XSAVE_ID(xsave); 7479 INTRINSIC_X86_XSAVE_ID(xsave64); 7480 INTRINSIC_X86_XSAVE_ID(xrstor); 7481 INTRINSIC_X86_XSAVE_ID(xrstor64); 7482 INTRINSIC_X86_XSAVE_ID(xsaveopt); 7483 INTRINSIC_X86_XSAVE_ID(xsaveopt64); 7484 INTRINSIC_X86_XSAVE_ID(xrstors); 7485 INTRINSIC_X86_XSAVE_ID(xrstors64); 7486 INTRINSIC_X86_XSAVE_ID(xsavec); 7487 INTRINSIC_X86_XSAVE_ID(xsavec64); 7488 INTRINSIC_X86_XSAVE_ID(xsaves); 7489 INTRINSIC_X86_XSAVE_ID(xsaves64); 7490 } 7491 #undef INTRINSIC_X86_XSAVE_ID 7492 Value *Mhi = Builder.CreateTrunc( 7493 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty); 7494 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty); 7495 Ops[1] = Mhi; 7496 Ops.push_back(Mlo); 7497 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 7498 } 7499 case X86::BI__builtin_ia32_storedqudi128_mask: 7500 case X86::BI__builtin_ia32_storedqusi128_mask: 7501 case X86::BI__builtin_ia32_storedquhi128_mask: 7502 case X86::BI__builtin_ia32_storedquqi128_mask: 7503 case X86::BI__builtin_ia32_storeupd128_mask: 7504 case X86::BI__builtin_ia32_storeups128_mask: 7505 case X86::BI__builtin_ia32_storedqudi256_mask: 7506 case X86::BI__builtin_ia32_storedqusi256_mask: 7507 case X86::BI__builtin_ia32_storedquhi256_mask: 7508 case X86::BI__builtin_ia32_storedquqi256_mask: 7509 case X86::BI__builtin_ia32_storeupd256_mask: 7510 case X86::BI__builtin_ia32_storeups256_mask: 7511 case X86::BI__builtin_ia32_storedqudi512_mask: 7512 case X86::BI__builtin_ia32_storedqusi512_mask: 7513 case X86::BI__builtin_ia32_storedquhi512_mask: 7514 case X86::BI__builtin_ia32_storedquqi512_mask: 7515 case X86::BI__builtin_ia32_storeupd512_mask: 7516 case X86::BI__builtin_ia32_storeups512_mask: 7517 return EmitX86MaskedStore(*this, Ops, 1); 7518 7519 case X86::BI__builtin_ia32_storess128_mask: 7520 case X86::BI__builtin_ia32_storesd128_mask: { 7521 return EmitX86MaskedStore(*this, Ops, 16); 7522 } 7523 case X86::BI__builtin_ia32_vpopcntd_512: 7524 case X86::BI__builtin_ia32_vpopcntq_512: { 7525 llvm::Type *ResultType = ConvertType(E->getType()); 7526 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 7527 return Builder.CreateCall(F, Ops); 7528 } 7529 case X86::BI__builtin_ia32_cvtmask2b128: 7530 case X86::BI__builtin_ia32_cvtmask2b256: 7531 case X86::BI__builtin_ia32_cvtmask2b512: 7532 case X86::BI__builtin_ia32_cvtmask2w128: 7533 case X86::BI__builtin_ia32_cvtmask2w256: 7534 case X86::BI__builtin_ia32_cvtmask2w512: 7535 case X86::BI__builtin_ia32_cvtmask2d128: 7536 case X86::BI__builtin_ia32_cvtmask2d256: 7537 case X86::BI__builtin_ia32_cvtmask2d512: 7538 case X86::BI__builtin_ia32_cvtmask2q128: 7539 case X86::BI__builtin_ia32_cvtmask2q256: 7540 case X86::BI__builtin_ia32_cvtmask2q512: 7541 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType())); 7542 7543 case X86::BI__builtin_ia32_movdqa32store128_mask: 7544 case X86::BI__builtin_ia32_movdqa64store128_mask: 7545 case X86::BI__builtin_ia32_storeaps128_mask: 7546 case X86::BI__builtin_ia32_storeapd128_mask: 7547 case X86::BI__builtin_ia32_movdqa32store256_mask: 7548 case X86::BI__builtin_ia32_movdqa64store256_mask: 7549 case X86::BI__builtin_ia32_storeaps256_mask: 7550 case X86::BI__builtin_ia32_storeapd256_mask: 7551 case X86::BI__builtin_ia32_movdqa32store512_mask: 7552 case X86::BI__builtin_ia32_movdqa64store512_mask: 7553 case X86::BI__builtin_ia32_storeaps512_mask: 7554 case X86::BI__builtin_ia32_storeapd512_mask: { 7555 unsigned Align = 7556 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 7557 return EmitX86MaskedStore(*this, Ops, Align); 7558 } 7559 case X86::BI__builtin_ia32_loadups128_mask: 7560 case X86::BI__builtin_ia32_loadups256_mask: 7561 case X86::BI__builtin_ia32_loadups512_mask: 7562 case X86::BI__builtin_ia32_loadupd128_mask: 7563 case X86::BI__builtin_ia32_loadupd256_mask: 7564 case X86::BI__builtin_ia32_loadupd512_mask: 7565 case X86::BI__builtin_ia32_loaddquqi128_mask: 7566 case X86::BI__builtin_ia32_loaddquqi256_mask: 7567 case X86::BI__builtin_ia32_loaddquqi512_mask: 7568 case X86::BI__builtin_ia32_loaddquhi128_mask: 7569 case X86::BI__builtin_ia32_loaddquhi256_mask: 7570 case X86::BI__builtin_ia32_loaddquhi512_mask: 7571 case X86::BI__builtin_ia32_loaddqusi128_mask: 7572 case X86::BI__builtin_ia32_loaddqusi256_mask: 7573 case X86::BI__builtin_ia32_loaddqusi512_mask: 7574 case X86::BI__builtin_ia32_loaddqudi128_mask: 7575 case X86::BI__builtin_ia32_loaddqudi256_mask: 7576 case X86::BI__builtin_ia32_loaddqudi512_mask: 7577 return EmitX86MaskedLoad(*this, Ops, 1); 7578 7579 case X86::BI__builtin_ia32_loadss128_mask: 7580 case X86::BI__builtin_ia32_loadsd128_mask: 7581 return EmitX86MaskedLoad(*this, Ops, 16); 7582 7583 case X86::BI__builtin_ia32_loadaps128_mask: 7584 case X86::BI__builtin_ia32_loadaps256_mask: 7585 case X86::BI__builtin_ia32_loadaps512_mask: 7586 case X86::BI__builtin_ia32_loadapd128_mask: 7587 case X86::BI__builtin_ia32_loadapd256_mask: 7588 case X86::BI__builtin_ia32_loadapd512_mask: 7589 case X86::BI__builtin_ia32_movdqa32load128_mask: 7590 case X86::BI__builtin_ia32_movdqa32load256_mask: 7591 case X86::BI__builtin_ia32_movdqa32load512_mask: 7592 case X86::BI__builtin_ia32_movdqa64load128_mask: 7593 case X86::BI__builtin_ia32_movdqa64load256_mask: 7594 case X86::BI__builtin_ia32_movdqa64load512_mask: { 7595 unsigned Align = 7596 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 7597 return EmitX86MaskedLoad(*this, Ops, Align); 7598 } 7599 7600 case X86::BI__builtin_ia32_vbroadcastf128_pd256: 7601 case X86::BI__builtin_ia32_vbroadcastf128_ps256: { 7602 llvm::Type *DstTy = ConvertType(E->getType()); 7603 return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1); 7604 } 7605 7606 case X86::BI__builtin_ia32_storehps: 7607 case X86::BI__builtin_ia32_storelps: { 7608 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 7609 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 7610 7611 // cast val v2i64 7612 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 7613 7614 // extract (0, 1) 7615 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 7616 llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index); 7617 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 7618 7619 // cast pointer to i64 & store 7620 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 7621 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7622 } 7623 case X86::BI__builtin_ia32_palignr128: 7624 case X86::BI__builtin_ia32_palignr256: 7625 case X86::BI__builtin_ia32_palignr512_mask: { 7626 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 7627 7628 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 7629 assert(NumElts % 16 == 0); 7630 7631 // If palignr is shifting the pair of vectors more than the size of two 7632 // lanes, emit zero. 7633 if (ShiftVal >= 32) 7634 return llvm::Constant::getNullValue(ConvertType(E->getType())); 7635 7636 // If palignr is shifting the pair of input vectors more than one lane, 7637 // but less than two lanes, convert to shifting in zeroes. 7638 if (ShiftVal > 16) { 7639 ShiftVal -= 16; 7640 Ops[1] = Ops[0]; 7641 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); 7642 } 7643 7644 uint32_t Indices[64]; 7645 // 256-bit palignr operates on 128-bit lanes so we need to handle that 7646 for (unsigned l = 0; l != NumElts; l += 16) { 7647 for (unsigned i = 0; i != 16; ++i) { 7648 unsigned Idx = ShiftVal + i; 7649 if (Idx >= 16) 7650 Idx += NumElts - 16; // End of lane, switch operand. 7651 Indices[l + i] = Idx + l; 7652 } 7653 } 7654 7655 Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0], 7656 makeArrayRef(Indices, NumElts), 7657 "palignr"); 7658 7659 // If this isn't a masked builtin, just return the align operation. 7660 if (Ops.size() == 3) 7661 return Align; 7662 7663 return EmitX86Select(*this, Ops[4], Align, Ops[3]); 7664 } 7665 7666 case X86::BI__builtin_ia32_movnti: 7667 case X86::BI__builtin_ia32_movnti64: 7668 case X86::BI__builtin_ia32_movntsd: 7669 case X86::BI__builtin_ia32_movntss: { 7670 llvm::MDNode *Node = llvm::MDNode::get( 7671 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); 7672 7673 Value *Ptr = Ops[0]; 7674 Value *Src = Ops[1]; 7675 7676 // Extract the 0'th element of the source vector. 7677 if (BuiltinID == X86::BI__builtin_ia32_movntsd || 7678 BuiltinID == X86::BI__builtin_ia32_movntss) 7679 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract"); 7680 7681 // Convert the type of the pointer to a pointer to the stored type. 7682 Value *BC = Builder.CreateBitCast( 7683 Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast"); 7684 7685 // Unaligned nontemporal store of the scalar value. 7686 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC); 7687 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 7688 SI->setAlignment(1); 7689 return SI; 7690 } 7691 7692 case X86::BI__builtin_ia32_selectb_128: 7693 case X86::BI__builtin_ia32_selectb_256: 7694 case X86::BI__builtin_ia32_selectb_512: 7695 case X86::BI__builtin_ia32_selectw_128: 7696 case X86::BI__builtin_ia32_selectw_256: 7697 case X86::BI__builtin_ia32_selectw_512: 7698 case X86::BI__builtin_ia32_selectd_128: 7699 case X86::BI__builtin_ia32_selectd_256: 7700 case X86::BI__builtin_ia32_selectd_512: 7701 case X86::BI__builtin_ia32_selectq_128: 7702 case X86::BI__builtin_ia32_selectq_256: 7703 case X86::BI__builtin_ia32_selectq_512: 7704 case X86::BI__builtin_ia32_selectps_128: 7705 case X86::BI__builtin_ia32_selectps_256: 7706 case X86::BI__builtin_ia32_selectps_512: 7707 case X86::BI__builtin_ia32_selectpd_128: 7708 case X86::BI__builtin_ia32_selectpd_256: 7709 case X86::BI__builtin_ia32_selectpd_512: 7710 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); 7711 case X86::BI__builtin_ia32_pcmpeqb128_mask: 7712 case X86::BI__builtin_ia32_pcmpeqb256_mask: 7713 case X86::BI__builtin_ia32_pcmpeqb512_mask: 7714 case X86::BI__builtin_ia32_pcmpeqw128_mask: 7715 case X86::BI__builtin_ia32_pcmpeqw256_mask: 7716 case X86::BI__builtin_ia32_pcmpeqw512_mask: 7717 case X86::BI__builtin_ia32_pcmpeqd128_mask: 7718 case X86::BI__builtin_ia32_pcmpeqd256_mask: 7719 case X86::BI__builtin_ia32_pcmpeqd512_mask: 7720 case X86::BI__builtin_ia32_pcmpeqq128_mask: 7721 case X86::BI__builtin_ia32_pcmpeqq256_mask: 7722 case X86::BI__builtin_ia32_pcmpeqq512_mask: 7723 return EmitX86MaskedCompare(*this, 0, false, Ops); 7724 case X86::BI__builtin_ia32_pcmpgtb128_mask: 7725 case X86::BI__builtin_ia32_pcmpgtb256_mask: 7726 case X86::BI__builtin_ia32_pcmpgtb512_mask: 7727 case X86::BI__builtin_ia32_pcmpgtw128_mask: 7728 case X86::BI__builtin_ia32_pcmpgtw256_mask: 7729 case X86::BI__builtin_ia32_pcmpgtw512_mask: 7730 case X86::BI__builtin_ia32_pcmpgtd128_mask: 7731 case X86::BI__builtin_ia32_pcmpgtd256_mask: 7732 case X86::BI__builtin_ia32_pcmpgtd512_mask: 7733 case X86::BI__builtin_ia32_pcmpgtq128_mask: 7734 case X86::BI__builtin_ia32_pcmpgtq256_mask: 7735 case X86::BI__builtin_ia32_pcmpgtq512_mask: 7736 return EmitX86MaskedCompare(*this, 6, true, Ops); 7737 case X86::BI__builtin_ia32_cmpb128_mask: 7738 case X86::BI__builtin_ia32_cmpb256_mask: 7739 case X86::BI__builtin_ia32_cmpb512_mask: 7740 case X86::BI__builtin_ia32_cmpw128_mask: 7741 case X86::BI__builtin_ia32_cmpw256_mask: 7742 case X86::BI__builtin_ia32_cmpw512_mask: 7743 case X86::BI__builtin_ia32_cmpd128_mask: 7744 case X86::BI__builtin_ia32_cmpd256_mask: 7745 case X86::BI__builtin_ia32_cmpd512_mask: 7746 case X86::BI__builtin_ia32_cmpq128_mask: 7747 case X86::BI__builtin_ia32_cmpq256_mask: 7748 case X86::BI__builtin_ia32_cmpq512_mask: { 7749 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 7750 return EmitX86MaskedCompare(*this, CC, true, Ops); 7751 } 7752 case X86::BI__builtin_ia32_ucmpb128_mask: 7753 case X86::BI__builtin_ia32_ucmpb256_mask: 7754 case X86::BI__builtin_ia32_ucmpb512_mask: 7755 case X86::BI__builtin_ia32_ucmpw128_mask: 7756 case X86::BI__builtin_ia32_ucmpw256_mask: 7757 case X86::BI__builtin_ia32_ucmpw512_mask: 7758 case X86::BI__builtin_ia32_ucmpd128_mask: 7759 case X86::BI__builtin_ia32_ucmpd256_mask: 7760 case X86::BI__builtin_ia32_ucmpd512_mask: 7761 case X86::BI__builtin_ia32_ucmpq128_mask: 7762 case X86::BI__builtin_ia32_ucmpq256_mask: 7763 case X86::BI__builtin_ia32_ucmpq512_mask: { 7764 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 7765 return EmitX86MaskedCompare(*this, CC, false, Ops); 7766 } 7767 7768 case X86::BI__builtin_ia32_vplzcntd_128_mask: 7769 case X86::BI__builtin_ia32_vplzcntd_256_mask: 7770 case X86::BI__builtin_ia32_vplzcntd_512_mask: 7771 case X86::BI__builtin_ia32_vplzcntq_128_mask: 7772 case X86::BI__builtin_ia32_vplzcntq_256_mask: 7773 case X86::BI__builtin_ia32_vplzcntq_512_mask: { 7774 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); 7775 return EmitX86Select(*this, Ops[2], 7776 Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}), 7777 Ops[1]); 7778 } 7779 7780 case X86::BI__builtin_ia32_pmaxsb128: 7781 case X86::BI__builtin_ia32_pmaxsw128: 7782 case X86::BI__builtin_ia32_pmaxsd128: 7783 case X86::BI__builtin_ia32_pmaxsq128_mask: 7784 case X86::BI__builtin_ia32_pmaxsb256: 7785 case X86::BI__builtin_ia32_pmaxsw256: 7786 case X86::BI__builtin_ia32_pmaxsd256: 7787 case X86::BI__builtin_ia32_pmaxsq256_mask: 7788 case X86::BI__builtin_ia32_pmaxsb512_mask: 7789 case X86::BI__builtin_ia32_pmaxsw512_mask: 7790 case X86::BI__builtin_ia32_pmaxsd512_mask: 7791 case X86::BI__builtin_ia32_pmaxsq512_mask: 7792 return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops); 7793 case X86::BI__builtin_ia32_pmaxub128: 7794 case X86::BI__builtin_ia32_pmaxuw128: 7795 case X86::BI__builtin_ia32_pmaxud128: 7796 case X86::BI__builtin_ia32_pmaxuq128_mask: 7797 case X86::BI__builtin_ia32_pmaxub256: 7798 case X86::BI__builtin_ia32_pmaxuw256: 7799 case X86::BI__builtin_ia32_pmaxud256: 7800 case X86::BI__builtin_ia32_pmaxuq256_mask: 7801 case X86::BI__builtin_ia32_pmaxub512_mask: 7802 case X86::BI__builtin_ia32_pmaxuw512_mask: 7803 case X86::BI__builtin_ia32_pmaxud512_mask: 7804 case X86::BI__builtin_ia32_pmaxuq512_mask: 7805 return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops); 7806 case X86::BI__builtin_ia32_pminsb128: 7807 case X86::BI__builtin_ia32_pminsw128: 7808 case X86::BI__builtin_ia32_pminsd128: 7809 case X86::BI__builtin_ia32_pminsq128_mask: 7810 case X86::BI__builtin_ia32_pminsb256: 7811 case X86::BI__builtin_ia32_pminsw256: 7812 case X86::BI__builtin_ia32_pminsd256: 7813 case X86::BI__builtin_ia32_pminsq256_mask: 7814 case X86::BI__builtin_ia32_pminsb512_mask: 7815 case X86::BI__builtin_ia32_pminsw512_mask: 7816 case X86::BI__builtin_ia32_pminsd512_mask: 7817 case X86::BI__builtin_ia32_pminsq512_mask: 7818 return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops); 7819 case X86::BI__builtin_ia32_pminub128: 7820 case X86::BI__builtin_ia32_pminuw128: 7821 case X86::BI__builtin_ia32_pminud128: 7822 case X86::BI__builtin_ia32_pminuq128_mask: 7823 case X86::BI__builtin_ia32_pminub256: 7824 case X86::BI__builtin_ia32_pminuw256: 7825 case X86::BI__builtin_ia32_pminud256: 7826 case X86::BI__builtin_ia32_pminuq256_mask: 7827 case X86::BI__builtin_ia32_pminub512_mask: 7828 case X86::BI__builtin_ia32_pminuw512_mask: 7829 case X86::BI__builtin_ia32_pminud512_mask: 7830 case X86::BI__builtin_ia32_pminuq512_mask: 7831 return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops); 7832 7833 // 3DNow! 7834 case X86::BI__builtin_ia32_pswapdsf: 7835 case X86::BI__builtin_ia32_pswapdsi: { 7836 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); 7837 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); 7838 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd); 7839 return Builder.CreateCall(F, Ops, "pswapd"); 7840 } 7841 case X86::BI__builtin_ia32_rdrand16_step: 7842 case X86::BI__builtin_ia32_rdrand32_step: 7843 case X86::BI__builtin_ia32_rdrand64_step: 7844 case X86::BI__builtin_ia32_rdseed16_step: 7845 case X86::BI__builtin_ia32_rdseed32_step: 7846 case X86::BI__builtin_ia32_rdseed64_step: { 7847 Intrinsic::ID ID; 7848 switch (BuiltinID) { 7849 default: llvm_unreachable("Unsupported intrinsic!"); 7850 case X86::BI__builtin_ia32_rdrand16_step: 7851 ID = Intrinsic::x86_rdrand_16; 7852 break; 7853 case X86::BI__builtin_ia32_rdrand32_step: 7854 ID = Intrinsic::x86_rdrand_32; 7855 break; 7856 case X86::BI__builtin_ia32_rdrand64_step: 7857 ID = Intrinsic::x86_rdrand_64; 7858 break; 7859 case X86::BI__builtin_ia32_rdseed16_step: 7860 ID = Intrinsic::x86_rdseed_16; 7861 break; 7862 case X86::BI__builtin_ia32_rdseed32_step: 7863 ID = Intrinsic::x86_rdseed_32; 7864 break; 7865 case X86::BI__builtin_ia32_rdseed64_step: 7866 ID = Intrinsic::x86_rdseed_64; 7867 break; 7868 } 7869 7870 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); 7871 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0), 7872 Ops[0]); 7873 return Builder.CreateExtractValue(Call, 1); 7874 } 7875 7876 // SSE packed comparison intrinsics 7877 case X86::BI__builtin_ia32_cmpeqps: 7878 case X86::BI__builtin_ia32_cmpeqpd: 7879 return getVectorFCmpIR(CmpInst::FCMP_OEQ); 7880 case X86::BI__builtin_ia32_cmpltps: 7881 case X86::BI__builtin_ia32_cmpltpd: 7882 return getVectorFCmpIR(CmpInst::FCMP_OLT); 7883 case X86::BI__builtin_ia32_cmpleps: 7884 case X86::BI__builtin_ia32_cmplepd: 7885 return getVectorFCmpIR(CmpInst::FCMP_OLE); 7886 case X86::BI__builtin_ia32_cmpunordps: 7887 case X86::BI__builtin_ia32_cmpunordpd: 7888 return getVectorFCmpIR(CmpInst::FCMP_UNO); 7889 case X86::BI__builtin_ia32_cmpneqps: 7890 case X86::BI__builtin_ia32_cmpneqpd: 7891 return getVectorFCmpIR(CmpInst::FCMP_UNE); 7892 case X86::BI__builtin_ia32_cmpnltps: 7893 case X86::BI__builtin_ia32_cmpnltpd: 7894 return getVectorFCmpIR(CmpInst::FCMP_UGE); 7895 case X86::BI__builtin_ia32_cmpnleps: 7896 case X86::BI__builtin_ia32_cmpnlepd: 7897 return getVectorFCmpIR(CmpInst::FCMP_UGT); 7898 case X86::BI__builtin_ia32_cmpordps: 7899 case X86::BI__builtin_ia32_cmpordpd: 7900 return getVectorFCmpIR(CmpInst::FCMP_ORD); 7901 case X86::BI__builtin_ia32_cmpps: 7902 case X86::BI__builtin_ia32_cmpps256: 7903 case X86::BI__builtin_ia32_cmppd: 7904 case X86::BI__builtin_ia32_cmppd256: { 7905 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 7906 // If this one of the SSE immediates, we can use native IR. 7907 if (CC < 8) { 7908 FCmpInst::Predicate Pred; 7909 switch (CC) { 7910 case 0: Pred = FCmpInst::FCMP_OEQ; break; 7911 case 1: Pred = FCmpInst::FCMP_OLT; break; 7912 case 2: Pred = FCmpInst::FCMP_OLE; break; 7913 case 3: Pred = FCmpInst::FCMP_UNO; break; 7914 case 4: Pred = FCmpInst::FCMP_UNE; break; 7915 case 5: Pred = FCmpInst::FCMP_UGE; break; 7916 case 6: Pred = FCmpInst::FCMP_UGT; break; 7917 case 7: Pred = FCmpInst::FCMP_ORD; break; 7918 } 7919 return getVectorFCmpIR(Pred); 7920 } 7921 7922 // We can't handle 8-31 immediates with native IR, use the intrinsic. 7923 Intrinsic::ID ID; 7924 switch (BuiltinID) { 7925 default: llvm_unreachable("Unsupported intrinsic!"); 7926 case X86::BI__builtin_ia32_cmpps: 7927 ID = Intrinsic::x86_sse_cmp_ps; 7928 break; 7929 case X86::BI__builtin_ia32_cmpps256: 7930 ID = Intrinsic::x86_avx_cmp_ps_256; 7931 break; 7932 case X86::BI__builtin_ia32_cmppd: 7933 ID = Intrinsic::x86_sse2_cmp_pd; 7934 break; 7935 case X86::BI__builtin_ia32_cmppd256: 7936 ID = Intrinsic::x86_avx_cmp_pd_256; 7937 break; 7938 } 7939 7940 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 7941 } 7942 7943 // SSE scalar comparison intrinsics 7944 case X86::BI__builtin_ia32_cmpeqss: 7945 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0); 7946 case X86::BI__builtin_ia32_cmpltss: 7947 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1); 7948 case X86::BI__builtin_ia32_cmpless: 7949 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2); 7950 case X86::BI__builtin_ia32_cmpunordss: 7951 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3); 7952 case X86::BI__builtin_ia32_cmpneqss: 7953 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4); 7954 case X86::BI__builtin_ia32_cmpnltss: 7955 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5); 7956 case X86::BI__builtin_ia32_cmpnless: 7957 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6); 7958 case X86::BI__builtin_ia32_cmpordss: 7959 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7); 7960 case X86::BI__builtin_ia32_cmpeqsd: 7961 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0); 7962 case X86::BI__builtin_ia32_cmpltsd: 7963 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1); 7964 case X86::BI__builtin_ia32_cmplesd: 7965 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2); 7966 case X86::BI__builtin_ia32_cmpunordsd: 7967 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3); 7968 case X86::BI__builtin_ia32_cmpneqsd: 7969 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4); 7970 case X86::BI__builtin_ia32_cmpnltsd: 7971 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5); 7972 case X86::BI__builtin_ia32_cmpnlesd: 7973 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6); 7974 case X86::BI__builtin_ia32_cmpordsd: 7975 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); 7976 7977 case X86::BI__emul: 7978 case X86::BI__emulu: { 7979 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); 7980 bool isSigned = (BuiltinID == X86::BI__emul); 7981 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned); 7982 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned); 7983 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned); 7984 } 7985 case X86::BI__mulh: 7986 case X86::BI__umulh: 7987 case X86::BI_mul128: 7988 case X86::BI_umul128: { 7989 llvm::Type *ResType = ConvertType(E->getType()); 7990 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 7991 7992 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128); 7993 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned); 7994 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned); 7995 7996 Value *MulResult, *HigherBits; 7997 if (IsSigned) { 7998 MulResult = Builder.CreateNSWMul(LHS, RHS); 7999 HigherBits = Builder.CreateAShr(MulResult, 64); 8000 } else { 8001 MulResult = Builder.CreateNUWMul(LHS, RHS); 8002 HigherBits = Builder.CreateLShr(MulResult, 64); 8003 } 8004 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); 8005 8006 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh) 8007 return HigherBits; 8008 8009 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2)); 8010 Builder.CreateStore(HigherBits, HighBitsAddress); 8011 return Builder.CreateIntCast(MulResult, ResType, IsSigned); 8012 } 8013 8014 case X86::BI__faststorefence: { 8015 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 8016 llvm::CrossThread); 8017 } 8018 case X86::BI_ReadWriteBarrier: 8019 case X86::BI_ReadBarrier: 8020 case X86::BI_WriteBarrier: { 8021 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 8022 llvm::SingleThread); 8023 } 8024 case X86::BI_BitScanForward: 8025 case X86::BI_BitScanForward64: 8026 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); 8027 case X86::BI_BitScanReverse: 8028 case X86::BI_BitScanReverse64: 8029 return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); 8030 8031 case X86::BI_InterlockedAnd64: 8032 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); 8033 case X86::BI_InterlockedExchange64: 8034 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); 8035 case X86::BI_InterlockedExchangeAdd64: 8036 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); 8037 case X86::BI_InterlockedExchangeSub64: 8038 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); 8039 case X86::BI_InterlockedOr64: 8040 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); 8041 case X86::BI_InterlockedXor64: 8042 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); 8043 case X86::BI_InterlockedDecrement64: 8044 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); 8045 case X86::BI_InterlockedIncrement64: 8046 return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); 8047 8048 case X86::BI_AddressOfReturnAddress: { 8049 Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); 8050 return Builder.CreateCall(F); 8051 } 8052 case X86::BI__stosb: { 8053 // We treat __stosb as a volatile memset - it may not generate "rep stosb" 8054 // instruction, but it will create a memset that won't be optimized away. 8055 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true); 8056 } 8057 case X86::BI__ud2: 8058 // llvm.trap makes a ud2a instruction on x86. 8059 return EmitTrapCall(Intrinsic::trap); 8060 case X86::BI__int2c: { 8061 // This syscall signals a driver assertion failure in x86 NT kernels. 8062 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); 8063 llvm::InlineAsm *IA = 8064 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true); 8065 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( 8066 getLLVMContext(), llvm::AttributeList::FunctionIndex, 8067 llvm::Attribute::NoReturn); 8068 CallSite CS = Builder.CreateCall(IA); 8069 CS.setAttributes(NoReturnAttr); 8070 return CS.getInstruction(); 8071 } 8072 case X86::BI__readfsbyte: 8073 case X86::BI__readfsword: 8074 case X86::BI__readfsdword: 8075 case X86::BI__readfsqword: { 8076 llvm::Type *IntTy = ConvertType(E->getType()); 8077 Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 8078 llvm::PointerType::get(IntTy, 257)); 8079 LoadInst *Load = Builder.CreateAlignedLoad( 8080 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); 8081 Load->setVolatile(true); 8082 return Load; 8083 } 8084 case X86::BI__readgsbyte: 8085 case X86::BI__readgsword: 8086 case X86::BI__readgsdword: 8087 case X86::BI__readgsqword: { 8088 llvm::Type *IntTy = ConvertType(E->getType()); 8089 Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 8090 llvm::PointerType::get(IntTy, 256)); 8091 LoadInst *Load = Builder.CreateAlignedLoad( 8092 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); 8093 Load->setVolatile(true); 8094 return Load; 8095 } 8096 } 8097 } 8098 8099 8100 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 8101 const CallExpr *E) { 8102 SmallVector<Value*, 4> Ops; 8103 8104 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 8105 Ops.push_back(EmitScalarExpr(E->getArg(i))); 8106 8107 Intrinsic::ID ID = Intrinsic::not_intrinsic; 8108 8109 switch (BuiltinID) { 8110 default: return nullptr; 8111 8112 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we 8113 // call __builtin_readcyclecounter. 8114 case PPC::BI__builtin_ppc_get_timebase: 8115 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); 8116 8117 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr 8118 case PPC::BI__builtin_altivec_lvx: 8119 case PPC::BI__builtin_altivec_lvxl: 8120 case PPC::BI__builtin_altivec_lvebx: 8121 case PPC::BI__builtin_altivec_lvehx: 8122 case PPC::BI__builtin_altivec_lvewx: 8123 case PPC::BI__builtin_altivec_lvsl: 8124 case PPC::BI__builtin_altivec_lvsr: 8125 case PPC::BI__builtin_vsx_lxvd2x: 8126 case PPC::BI__builtin_vsx_lxvw4x: 8127 case PPC::BI__builtin_vsx_lxvd2x_be: 8128 case PPC::BI__builtin_vsx_lxvw4x_be: 8129 case PPC::BI__builtin_vsx_lxvl: 8130 case PPC::BI__builtin_vsx_lxvll: 8131 { 8132 if(BuiltinID == PPC::BI__builtin_vsx_lxvl || 8133 BuiltinID == PPC::BI__builtin_vsx_lxvll){ 8134 Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy); 8135 }else { 8136 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 8137 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); 8138 Ops.pop_back(); 8139 } 8140 8141 switch (BuiltinID) { 8142 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); 8143 case PPC::BI__builtin_altivec_lvx: 8144 ID = Intrinsic::ppc_altivec_lvx; 8145 break; 8146 case PPC::BI__builtin_altivec_lvxl: 8147 ID = Intrinsic::ppc_altivec_lvxl; 8148 break; 8149 case PPC::BI__builtin_altivec_lvebx: 8150 ID = Intrinsic::ppc_altivec_lvebx; 8151 break; 8152 case PPC::BI__builtin_altivec_lvehx: 8153 ID = Intrinsic::ppc_altivec_lvehx; 8154 break; 8155 case PPC::BI__builtin_altivec_lvewx: 8156 ID = Intrinsic::ppc_altivec_lvewx; 8157 break; 8158 case PPC::BI__builtin_altivec_lvsl: 8159 ID = Intrinsic::ppc_altivec_lvsl; 8160 break; 8161 case PPC::BI__builtin_altivec_lvsr: 8162 ID = Intrinsic::ppc_altivec_lvsr; 8163 break; 8164 case PPC::BI__builtin_vsx_lxvd2x: 8165 ID = Intrinsic::ppc_vsx_lxvd2x; 8166 break; 8167 case PPC::BI__builtin_vsx_lxvw4x: 8168 ID = Intrinsic::ppc_vsx_lxvw4x; 8169 break; 8170 case PPC::BI__builtin_vsx_lxvd2x_be: 8171 ID = Intrinsic::ppc_vsx_lxvd2x_be; 8172 break; 8173 case PPC::BI__builtin_vsx_lxvw4x_be: 8174 ID = Intrinsic::ppc_vsx_lxvw4x_be; 8175 break; 8176 case PPC::BI__builtin_vsx_lxvl: 8177 ID = Intrinsic::ppc_vsx_lxvl; 8178 break; 8179 case PPC::BI__builtin_vsx_lxvll: 8180 ID = Intrinsic::ppc_vsx_lxvll; 8181 break; 8182 } 8183 llvm::Function *F = CGM.getIntrinsic(ID); 8184 return Builder.CreateCall(F, Ops, ""); 8185 } 8186 8187 // vec_st, vec_xst_be 8188 case PPC::BI__builtin_altivec_stvx: 8189 case PPC::BI__builtin_altivec_stvxl: 8190 case PPC::BI__builtin_altivec_stvebx: 8191 case PPC::BI__builtin_altivec_stvehx: 8192 case PPC::BI__builtin_altivec_stvewx: 8193 case PPC::BI__builtin_vsx_stxvd2x: 8194 case PPC::BI__builtin_vsx_stxvw4x: 8195 case PPC::BI__builtin_vsx_stxvd2x_be: 8196 case PPC::BI__builtin_vsx_stxvw4x_be: 8197 case PPC::BI__builtin_vsx_stxvl: 8198 case PPC::BI__builtin_vsx_stxvll: 8199 { 8200 if(BuiltinID == PPC::BI__builtin_vsx_stxvl || 8201 BuiltinID == PPC::BI__builtin_vsx_stxvll ){ 8202 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 8203 }else { 8204 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 8205 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); 8206 Ops.pop_back(); 8207 } 8208 8209 switch (BuiltinID) { 8210 default: llvm_unreachable("Unsupported st intrinsic!"); 8211 case PPC::BI__builtin_altivec_stvx: 8212 ID = Intrinsic::ppc_altivec_stvx; 8213 break; 8214 case PPC::BI__builtin_altivec_stvxl: 8215 ID = Intrinsic::ppc_altivec_stvxl; 8216 break; 8217 case PPC::BI__builtin_altivec_stvebx: 8218 ID = Intrinsic::ppc_altivec_stvebx; 8219 break; 8220 case PPC::BI__builtin_altivec_stvehx: 8221 ID = Intrinsic::ppc_altivec_stvehx; 8222 break; 8223 case PPC::BI__builtin_altivec_stvewx: 8224 ID = Intrinsic::ppc_altivec_stvewx; 8225 break; 8226 case PPC::BI__builtin_vsx_stxvd2x: 8227 ID = Intrinsic::ppc_vsx_stxvd2x; 8228 break; 8229 case PPC::BI__builtin_vsx_stxvw4x: 8230 ID = Intrinsic::ppc_vsx_stxvw4x; 8231 break; 8232 case PPC::BI__builtin_vsx_stxvd2x_be: 8233 ID = Intrinsic::ppc_vsx_stxvd2x_be; 8234 break; 8235 case PPC::BI__builtin_vsx_stxvw4x_be: 8236 ID = Intrinsic::ppc_vsx_stxvw4x_be; 8237 break; 8238 case PPC::BI__builtin_vsx_stxvl: 8239 ID = Intrinsic::ppc_vsx_stxvl; 8240 break; 8241 case PPC::BI__builtin_vsx_stxvll: 8242 ID = Intrinsic::ppc_vsx_stxvll; 8243 break; 8244 } 8245 llvm::Function *F = CGM.getIntrinsic(ID); 8246 return Builder.CreateCall(F, Ops, ""); 8247 } 8248 // Square root 8249 case PPC::BI__builtin_vsx_xvsqrtsp: 8250 case PPC::BI__builtin_vsx_xvsqrtdp: { 8251 llvm::Type *ResultType = ConvertType(E->getType()); 8252 Value *X = EmitScalarExpr(E->getArg(0)); 8253 ID = Intrinsic::sqrt; 8254 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 8255 return Builder.CreateCall(F, X); 8256 } 8257 // Count leading zeros 8258 case PPC::BI__builtin_altivec_vclzb: 8259 case PPC::BI__builtin_altivec_vclzh: 8260 case PPC::BI__builtin_altivec_vclzw: 8261 case PPC::BI__builtin_altivec_vclzd: { 8262 llvm::Type *ResultType = ConvertType(E->getType()); 8263 Value *X = EmitScalarExpr(E->getArg(0)); 8264 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8265 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 8266 return Builder.CreateCall(F, {X, Undef}); 8267 } 8268 case PPC::BI__builtin_altivec_vctzb: 8269 case PPC::BI__builtin_altivec_vctzh: 8270 case PPC::BI__builtin_altivec_vctzw: 8271 case PPC::BI__builtin_altivec_vctzd: { 8272 llvm::Type *ResultType = ConvertType(E->getType()); 8273 Value *X = EmitScalarExpr(E->getArg(0)); 8274 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8275 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 8276 return Builder.CreateCall(F, {X, Undef}); 8277 } 8278 case PPC::BI__builtin_altivec_vpopcntb: 8279 case PPC::BI__builtin_altivec_vpopcnth: 8280 case PPC::BI__builtin_altivec_vpopcntw: 8281 case PPC::BI__builtin_altivec_vpopcntd: { 8282 llvm::Type *ResultType = ConvertType(E->getType()); 8283 Value *X = EmitScalarExpr(E->getArg(0)); 8284 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 8285 return Builder.CreateCall(F, X); 8286 } 8287 // Copy sign 8288 case PPC::BI__builtin_vsx_xvcpsgnsp: 8289 case PPC::BI__builtin_vsx_xvcpsgndp: { 8290 llvm::Type *ResultType = ConvertType(E->getType()); 8291 Value *X = EmitScalarExpr(E->getArg(0)); 8292 Value *Y = EmitScalarExpr(E->getArg(1)); 8293 ID = Intrinsic::copysign; 8294 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 8295 return Builder.CreateCall(F, {X, Y}); 8296 } 8297 // Rounding/truncation 8298 case PPC::BI__builtin_vsx_xvrspip: 8299 case PPC::BI__builtin_vsx_xvrdpip: 8300 case PPC::BI__builtin_vsx_xvrdpim: 8301 case PPC::BI__builtin_vsx_xvrspim: 8302 case PPC::BI__builtin_vsx_xvrdpi: 8303 case PPC::BI__builtin_vsx_xvrspi: 8304 case PPC::BI__builtin_vsx_xvrdpic: 8305 case PPC::BI__builtin_vsx_xvrspic: 8306 case PPC::BI__builtin_vsx_xvrdpiz: 8307 case PPC::BI__builtin_vsx_xvrspiz: { 8308 llvm::Type *ResultType = ConvertType(E->getType()); 8309 Value *X = EmitScalarExpr(E->getArg(0)); 8310 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim || 8311 BuiltinID == PPC::BI__builtin_vsx_xvrspim) 8312 ID = Intrinsic::floor; 8313 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi || 8314 BuiltinID == PPC::BI__builtin_vsx_xvrspi) 8315 ID = Intrinsic::round; 8316 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || 8317 BuiltinID == PPC::BI__builtin_vsx_xvrspic) 8318 ID = Intrinsic::nearbyint; 8319 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || 8320 BuiltinID == PPC::BI__builtin_vsx_xvrspip) 8321 ID = Intrinsic::ceil; 8322 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz || 8323 BuiltinID == PPC::BI__builtin_vsx_xvrspiz) 8324 ID = Intrinsic::trunc; 8325 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 8326 return Builder.CreateCall(F, X); 8327 } 8328 8329 // Absolute value 8330 case PPC::BI__builtin_vsx_xvabsdp: 8331 case PPC::BI__builtin_vsx_xvabssp: { 8332 llvm::Type *ResultType = ConvertType(E->getType()); 8333 Value *X = EmitScalarExpr(E->getArg(0)); 8334 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 8335 return Builder.CreateCall(F, X); 8336 } 8337 8338 // FMA variations 8339 case PPC::BI__builtin_vsx_xvmaddadp: 8340 case PPC::BI__builtin_vsx_xvmaddasp: 8341 case PPC::BI__builtin_vsx_xvnmaddadp: 8342 case PPC::BI__builtin_vsx_xvnmaddasp: 8343 case PPC::BI__builtin_vsx_xvmsubadp: 8344 case PPC::BI__builtin_vsx_xvmsubasp: 8345 case PPC::BI__builtin_vsx_xvnmsubadp: 8346 case PPC::BI__builtin_vsx_xvnmsubasp: { 8347 llvm::Type *ResultType = ConvertType(E->getType()); 8348 Value *X = EmitScalarExpr(E->getArg(0)); 8349 Value *Y = EmitScalarExpr(E->getArg(1)); 8350 Value *Z = EmitScalarExpr(E->getArg(2)); 8351 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 8352 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 8353 switch (BuiltinID) { 8354 case PPC::BI__builtin_vsx_xvmaddadp: 8355 case PPC::BI__builtin_vsx_xvmaddasp: 8356 return Builder.CreateCall(F, {X, Y, Z}); 8357 case PPC::BI__builtin_vsx_xvnmaddadp: 8358 case PPC::BI__builtin_vsx_xvnmaddasp: 8359 return Builder.CreateFSub(Zero, 8360 Builder.CreateCall(F, {X, Y, Z}), "sub"); 8361 case PPC::BI__builtin_vsx_xvmsubadp: 8362 case PPC::BI__builtin_vsx_xvmsubasp: 8363 return Builder.CreateCall(F, 8364 {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 8365 case PPC::BI__builtin_vsx_xvnmsubadp: 8366 case PPC::BI__builtin_vsx_xvnmsubasp: 8367 Value *FsubRes = 8368 Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 8369 return Builder.CreateFSub(Zero, FsubRes, "sub"); 8370 } 8371 llvm_unreachable("Unknown FMA operation"); 8372 return nullptr; // Suppress no-return warning 8373 } 8374 8375 case PPC::BI__builtin_vsx_insertword: { 8376 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); 8377 8378 // Third argument is a compile time constant int. It must be clamped to 8379 // to the range [0, 12]. 8380 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 8381 assert(ArgCI && 8382 "Third arg to xxinsertw intrinsic must be constant integer"); 8383 const int64_t MaxIndex = 12; 8384 int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); 8385 8386 // The builtin semantics don't exactly match the xxinsertw instructions 8387 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the 8388 // word from the first argument, and inserts it in the second argument. The 8389 // instruction extracts the word from its second input register and inserts 8390 // it into its first input register, so swap the first and second arguments. 8391 std::swap(Ops[0], Ops[1]); 8392 8393 // Need to cast the second argument from a vector of unsigned int to a 8394 // vector of long long. 8395 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); 8396 8397 if (getTarget().isLittleEndian()) { 8398 // Create a shuffle mask of (1, 0) 8399 Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), 8400 ConstantInt::get(Int32Ty, 0) 8401 }; 8402 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8403 8404 // Reverse the double words in the vector we will extract from. 8405 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 8406 Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask); 8407 8408 // Reverse the index. 8409 Index = MaxIndex - Index; 8410 } 8411 8412 // Intrinsic expects the first arg to be a vector of int. 8413 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 8414 Ops[2] = ConstantInt::getSigned(Int32Ty, Index); 8415 return Builder.CreateCall(F, Ops); 8416 } 8417 8418 case PPC::BI__builtin_vsx_extractuword: { 8419 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); 8420 8421 // Intrinsic expects the first argument to be a vector of doublewords. 8422 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 8423 8424 // The second argument is a compile time constant int that needs to 8425 // be clamped to the range [0, 12]. 8426 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]); 8427 assert(ArgCI && 8428 "Second Arg to xxextractuw intrinsic must be a constant integer!"); 8429 const int64_t MaxIndex = 12; 8430 int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); 8431 8432 if (getTarget().isLittleEndian()) { 8433 // Reverse the index. 8434 Index = MaxIndex - Index; 8435 Ops[1] = ConstantInt::getSigned(Int32Ty, Index); 8436 8437 // Emit the call, then reverse the double words of the results vector. 8438 Value *Call = Builder.CreateCall(F, Ops); 8439 8440 // Create a shuffle mask of (1, 0) 8441 Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), 8442 ConstantInt::get(Int32Ty, 0) 8443 }; 8444 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8445 8446 Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask); 8447 return ShuffleCall; 8448 } else { 8449 Ops[1] = ConstantInt::getSigned(Int32Ty, Index); 8450 return Builder.CreateCall(F, Ops); 8451 } 8452 } 8453 8454 case PPC::BI__builtin_vsx_xxpermdi: { 8455 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 8456 assert(ArgCI && "Third arg must be constant integer!"); 8457 8458 unsigned Index = ArgCI->getZExtValue(); 8459 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 8460 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); 8461 8462 // Element zero comes from the first input vector and element one comes from 8463 // the second. The element indices within each vector are numbered in big 8464 // endian order so the shuffle mask must be adjusted for this on little 8465 // endian platforms (i.e. index is complemented and source vector reversed). 8466 unsigned ElemIdx0; 8467 unsigned ElemIdx1; 8468 if (getTarget().isLittleEndian()) { 8469 ElemIdx0 = (~Index & 1) + 2; 8470 ElemIdx1 = (~Index & 2) >> 1; 8471 } else { // BigEndian 8472 ElemIdx0 = (Index & 2) >> 1; 8473 ElemIdx1 = 2 + (Index & 1); 8474 } 8475 8476 Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0), 8477 ConstantInt::get(Int32Ty, ElemIdx1)}; 8478 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8479 8480 Value *ShuffleCall = 8481 Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); 8482 QualType BIRetType = E->getType(); 8483 auto RetTy = ConvertType(BIRetType); 8484 return Builder.CreateBitCast(ShuffleCall, RetTy); 8485 } 8486 8487 case PPC::BI__builtin_vsx_xxsldwi: { 8488 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); 8489 assert(ArgCI && "Third argument must be a compile time constant"); 8490 unsigned Index = ArgCI->getZExtValue() & 0x3; 8491 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 8492 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4)); 8493 8494 // Create a shuffle mask 8495 unsigned ElemIdx0; 8496 unsigned ElemIdx1; 8497 unsigned ElemIdx2; 8498 unsigned ElemIdx3; 8499 if (getTarget().isLittleEndian()) { 8500 // Little endian element N comes from element 8+N-Index of the 8501 // concatenated wide vector (of course, using modulo arithmetic on 8502 // the total number of elements). 8503 ElemIdx0 = (8 - Index) % 8; 8504 ElemIdx1 = (9 - Index) % 8; 8505 ElemIdx2 = (10 - Index) % 8; 8506 ElemIdx3 = (11 - Index) % 8; 8507 } else { 8508 // Big endian ElemIdx<N> = Index + N 8509 ElemIdx0 = Index; 8510 ElemIdx1 = Index + 1; 8511 ElemIdx2 = Index + 2; 8512 ElemIdx3 = Index + 3; 8513 } 8514 8515 Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0), 8516 ConstantInt::get(Int32Ty, ElemIdx1), 8517 ConstantInt::get(Int32Ty, ElemIdx2), 8518 ConstantInt::get(Int32Ty, ElemIdx3)}; 8519 8520 Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); 8521 Value *ShuffleCall = 8522 Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); 8523 QualType BIRetType = E->getType(); 8524 auto RetTy = ConvertType(BIRetType); 8525 return Builder.CreateBitCast(ShuffleCall, RetTy); 8526 } 8527 } 8528 } 8529 8530 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, 8531 const CallExpr *E) { 8532 switch (BuiltinID) { 8533 case AMDGPU::BI__builtin_amdgcn_div_scale: 8534 case AMDGPU::BI__builtin_amdgcn_div_scalef: { 8535 // Translate from the intrinsics's struct return to the builtin's out 8536 // argument. 8537 8538 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3)); 8539 8540 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 8541 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 8542 llvm::Value *Z = EmitScalarExpr(E->getArg(2)); 8543 8544 llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, 8545 X->getType()); 8546 8547 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); 8548 8549 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); 8550 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); 8551 8552 llvm::Type *RealFlagType 8553 = FlagOutPtr.getPointer()->getType()->getPointerElementType(); 8554 8555 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); 8556 Builder.CreateStore(FlagExt, FlagOutPtr); 8557 return Result; 8558 } 8559 case AMDGPU::BI__builtin_amdgcn_div_fmas: 8560 case AMDGPU::BI__builtin_amdgcn_div_fmasf: { 8561 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 8562 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 8563 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 8564 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); 8565 8566 llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, 8567 Src0->getType()); 8568 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); 8569 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); 8570 } 8571 8572 case AMDGPU::BI__builtin_amdgcn_ds_swizzle: 8573 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); 8574 case AMDGPU::BI__builtin_amdgcn_mov_dpp: { 8575 llvm::SmallVector<llvm::Value *, 5> Args; 8576 for (unsigned I = 0; I != 5; ++I) 8577 Args.push_back(EmitScalarExpr(E->getArg(I))); 8578 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp, 8579 Args[0]->getType()); 8580 return Builder.CreateCall(F, Args); 8581 } 8582 case AMDGPU::BI__builtin_amdgcn_div_fixup: 8583 case AMDGPU::BI__builtin_amdgcn_div_fixupf: 8584 case AMDGPU::BI__builtin_amdgcn_div_fixuph: 8585 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup); 8586 case AMDGPU::BI__builtin_amdgcn_trig_preop: 8587 case AMDGPU::BI__builtin_amdgcn_trig_preopf: 8588 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); 8589 case AMDGPU::BI__builtin_amdgcn_rcp: 8590 case AMDGPU::BI__builtin_amdgcn_rcpf: 8591 case AMDGPU::BI__builtin_amdgcn_rcph: 8592 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp); 8593 case AMDGPU::BI__builtin_amdgcn_rsq: 8594 case AMDGPU::BI__builtin_amdgcn_rsqf: 8595 case AMDGPU::BI__builtin_amdgcn_rsqh: 8596 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq); 8597 case AMDGPU::BI__builtin_amdgcn_rsq_clamp: 8598 case AMDGPU::BI__builtin_amdgcn_rsq_clampf: 8599 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp); 8600 case AMDGPU::BI__builtin_amdgcn_sinf: 8601 case AMDGPU::BI__builtin_amdgcn_sinh: 8602 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin); 8603 case AMDGPU::BI__builtin_amdgcn_cosf: 8604 case AMDGPU::BI__builtin_amdgcn_cosh: 8605 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); 8606 case AMDGPU::BI__builtin_amdgcn_log_clampf: 8607 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); 8608 case AMDGPU::BI__builtin_amdgcn_ldexp: 8609 case AMDGPU::BI__builtin_amdgcn_ldexpf: 8610 case AMDGPU::BI__builtin_amdgcn_ldexph: 8611 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); 8612 case AMDGPU::BI__builtin_amdgcn_frexp_mant: 8613 case AMDGPU::BI__builtin_amdgcn_frexp_mantf: 8614 case AMDGPU::BI__builtin_amdgcn_frexp_manth: 8615 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant); 8616 case AMDGPU::BI__builtin_amdgcn_frexp_exp: 8617 case AMDGPU::BI__builtin_amdgcn_frexp_expf: { 8618 Value *Src0 = EmitScalarExpr(E->getArg(0)); 8619 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, 8620 { Builder.getInt32Ty(), Src0->getType() }); 8621 return Builder.CreateCall(F, Src0); 8622 } 8623 case AMDGPU::BI__builtin_amdgcn_frexp_exph: { 8624 Value *Src0 = EmitScalarExpr(E->getArg(0)); 8625 Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, 8626 { Builder.getInt16Ty(), Src0->getType() }); 8627 return Builder.CreateCall(F, Src0); 8628 } 8629 case AMDGPU::BI__builtin_amdgcn_fract: 8630 case AMDGPU::BI__builtin_amdgcn_fractf: 8631 case AMDGPU::BI__builtin_amdgcn_fracth: 8632 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); 8633 case AMDGPU::BI__builtin_amdgcn_lerp: 8634 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); 8635 case AMDGPU::BI__builtin_amdgcn_uicmp: 8636 case AMDGPU::BI__builtin_amdgcn_uicmpl: 8637 case AMDGPU::BI__builtin_amdgcn_sicmp: 8638 case AMDGPU::BI__builtin_amdgcn_sicmpl: 8639 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp); 8640 case AMDGPU::BI__builtin_amdgcn_fcmp: 8641 case AMDGPU::BI__builtin_amdgcn_fcmpf: 8642 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp); 8643 case AMDGPU::BI__builtin_amdgcn_class: 8644 case AMDGPU::BI__builtin_amdgcn_classf: 8645 case AMDGPU::BI__builtin_amdgcn_classh: 8646 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); 8647 case AMDGPU::BI__builtin_amdgcn_fmed3f: 8648 case AMDGPU::BI__builtin_amdgcn_fmed3h: 8649 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); 8650 case AMDGPU::BI__builtin_amdgcn_read_exec: { 8651 CallInst *CI = cast<CallInst>( 8652 EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); 8653 CI->setConvergent(); 8654 return CI; 8655 } 8656 8657 // amdgcn workitem 8658 case AMDGPU::BI__builtin_amdgcn_workitem_id_x: 8659 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); 8660 case AMDGPU::BI__builtin_amdgcn_workitem_id_y: 8661 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024); 8662 case AMDGPU::BI__builtin_amdgcn_workitem_id_z: 8663 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); 8664 8665 // r600 intrinsics 8666 case AMDGPU::BI__builtin_r600_recipsqrt_ieee: 8667 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: 8668 return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee); 8669 case AMDGPU::BI__builtin_r600_read_tidig_x: 8670 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); 8671 case AMDGPU::BI__builtin_r600_read_tidig_y: 8672 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); 8673 case AMDGPU::BI__builtin_r600_read_tidig_z: 8674 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); 8675 default: 8676 return nullptr; 8677 } 8678 } 8679 8680 /// Handle a SystemZ function in which the final argument is a pointer 8681 /// to an int that receives the post-instruction CC value. At the LLVM level 8682 /// this is represented as a function that returns a {result, cc} pair. 8683 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, 8684 unsigned IntrinsicID, 8685 const CallExpr *E) { 8686 unsigned NumArgs = E->getNumArgs() - 1; 8687 SmallVector<Value *, 8> Args(NumArgs); 8688 for (unsigned I = 0; I < NumArgs; ++I) 8689 Args[I] = CGF.EmitScalarExpr(E->getArg(I)); 8690 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs)); 8691 Value *F = CGF.CGM.getIntrinsic(IntrinsicID); 8692 Value *Call = CGF.Builder.CreateCall(F, Args); 8693 Value *CC = CGF.Builder.CreateExtractValue(Call, 1); 8694 CGF.Builder.CreateStore(CC, CCPtr); 8695 return CGF.Builder.CreateExtractValue(Call, 0); 8696 } 8697 8698 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, 8699 const CallExpr *E) { 8700 switch (BuiltinID) { 8701 case SystemZ::BI__builtin_tbegin: { 8702 Value *TDB = EmitScalarExpr(E->getArg(0)); 8703 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 8704 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); 8705 return Builder.CreateCall(F, {TDB, Control}); 8706 } 8707 case SystemZ::BI__builtin_tbegin_nofloat: { 8708 Value *TDB = EmitScalarExpr(E->getArg(0)); 8709 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 8710 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); 8711 return Builder.CreateCall(F, {TDB, Control}); 8712 } 8713 case SystemZ::BI__builtin_tbeginc: { 8714 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); 8715 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); 8716 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); 8717 return Builder.CreateCall(F, {TDB, Control}); 8718 } 8719 case SystemZ::BI__builtin_tabort: { 8720 Value *Data = EmitScalarExpr(E->getArg(0)); 8721 Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort); 8722 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); 8723 } 8724 case SystemZ::BI__builtin_non_tx_store: { 8725 Value *Address = EmitScalarExpr(E->getArg(0)); 8726 Value *Data = EmitScalarExpr(E->getArg(1)); 8727 Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); 8728 return Builder.CreateCall(F, {Data, Address}); 8729 } 8730 8731 // Vector builtins. Note that most vector builtins are mapped automatically 8732 // to target-specific LLVM intrinsics. The ones handled specially here can 8733 // be represented via standard LLVM IR, which is preferable to enable common 8734 // LLVM optimizations. 8735 8736 case SystemZ::BI__builtin_s390_vpopctb: 8737 case SystemZ::BI__builtin_s390_vpopcth: 8738 case SystemZ::BI__builtin_s390_vpopctf: 8739 case SystemZ::BI__builtin_s390_vpopctg: { 8740 llvm::Type *ResultType = ConvertType(E->getType()); 8741 Value *X = EmitScalarExpr(E->getArg(0)); 8742 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 8743 return Builder.CreateCall(F, X); 8744 } 8745 8746 case SystemZ::BI__builtin_s390_vclzb: 8747 case SystemZ::BI__builtin_s390_vclzh: 8748 case SystemZ::BI__builtin_s390_vclzf: 8749 case SystemZ::BI__builtin_s390_vclzg: { 8750 llvm::Type *ResultType = ConvertType(E->getType()); 8751 Value *X = EmitScalarExpr(E->getArg(0)); 8752 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8753 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 8754 return Builder.CreateCall(F, {X, Undef}); 8755 } 8756 8757 case SystemZ::BI__builtin_s390_vctzb: 8758 case SystemZ::BI__builtin_s390_vctzh: 8759 case SystemZ::BI__builtin_s390_vctzf: 8760 case SystemZ::BI__builtin_s390_vctzg: { 8761 llvm::Type *ResultType = ConvertType(E->getType()); 8762 Value *X = EmitScalarExpr(E->getArg(0)); 8763 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 8764 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 8765 return Builder.CreateCall(F, {X, Undef}); 8766 } 8767 8768 case SystemZ::BI__builtin_s390_vfsqdb: { 8769 llvm::Type *ResultType = ConvertType(E->getType()); 8770 Value *X = EmitScalarExpr(E->getArg(0)); 8771 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); 8772 return Builder.CreateCall(F, X); 8773 } 8774 case SystemZ::BI__builtin_s390_vfmadb: { 8775 llvm::Type *ResultType = ConvertType(E->getType()); 8776 Value *X = EmitScalarExpr(E->getArg(0)); 8777 Value *Y = EmitScalarExpr(E->getArg(1)); 8778 Value *Z = EmitScalarExpr(E->getArg(2)); 8779 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 8780 return Builder.CreateCall(F, {X, Y, Z}); 8781 } 8782 case SystemZ::BI__builtin_s390_vfmsdb: { 8783 llvm::Type *ResultType = ConvertType(E->getType()); 8784 Value *X = EmitScalarExpr(E->getArg(0)); 8785 Value *Y = EmitScalarExpr(E->getArg(1)); 8786 Value *Z = EmitScalarExpr(E->getArg(2)); 8787 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 8788 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 8789 return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 8790 } 8791 case SystemZ::BI__builtin_s390_vflpdb: { 8792 llvm::Type *ResultType = ConvertType(E->getType()); 8793 Value *X = EmitScalarExpr(E->getArg(0)); 8794 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 8795 return Builder.CreateCall(F, X); 8796 } 8797 case SystemZ::BI__builtin_s390_vflndb: { 8798 llvm::Type *ResultType = ConvertType(E->getType()); 8799 Value *X = EmitScalarExpr(E->getArg(0)); 8800 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 8801 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 8802 return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub"); 8803 } 8804 case SystemZ::BI__builtin_s390_vfidb: { 8805 llvm::Type *ResultType = ConvertType(E->getType()); 8806 Value *X = EmitScalarExpr(E->getArg(0)); 8807 // Constant-fold the M4 and M5 mask arguments. 8808 llvm::APSInt M4, M5; 8809 bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext()); 8810 bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext()); 8811 assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?"); 8812 (void)IsConstM4; (void)IsConstM5; 8813 // Check whether this instance of vfidb can be represented via a LLVM 8814 // standard intrinsic. We only support some combinations of M4 and M5. 8815 Intrinsic::ID ID = Intrinsic::not_intrinsic; 8816 switch (M4.getZExtValue()) { 8817 default: break; 8818 case 0: // IEEE-inexact exception allowed 8819 switch (M5.getZExtValue()) { 8820 default: break; 8821 case 0: ID = Intrinsic::rint; break; 8822 } 8823 break; 8824 case 4: // IEEE-inexact exception suppressed 8825 switch (M5.getZExtValue()) { 8826 default: break; 8827 case 0: ID = Intrinsic::nearbyint; break; 8828 case 1: ID = Intrinsic::round; break; 8829 case 5: ID = Intrinsic::trunc; break; 8830 case 6: ID = Intrinsic::ceil; break; 8831 case 7: ID = Intrinsic::floor; break; 8832 } 8833 break; 8834 } 8835 if (ID != Intrinsic::not_intrinsic) { 8836 Function *F = CGM.getIntrinsic(ID, ResultType); 8837 return Builder.CreateCall(F, X); 8838 } 8839 Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb); 8840 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 8841 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5); 8842 return Builder.CreateCall(F, {X, M4Value, M5Value}); 8843 } 8844 8845 // Vector intrisincs that output the post-instruction CC value. 8846 8847 #define INTRINSIC_WITH_CC(NAME) \ 8848 case SystemZ::BI__builtin_##NAME: \ 8849 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E) 8850 8851 INTRINSIC_WITH_CC(s390_vpkshs); 8852 INTRINSIC_WITH_CC(s390_vpksfs); 8853 INTRINSIC_WITH_CC(s390_vpksgs); 8854 8855 INTRINSIC_WITH_CC(s390_vpklshs); 8856 INTRINSIC_WITH_CC(s390_vpklsfs); 8857 INTRINSIC_WITH_CC(s390_vpklsgs); 8858 8859 INTRINSIC_WITH_CC(s390_vceqbs); 8860 INTRINSIC_WITH_CC(s390_vceqhs); 8861 INTRINSIC_WITH_CC(s390_vceqfs); 8862 INTRINSIC_WITH_CC(s390_vceqgs); 8863 8864 INTRINSIC_WITH_CC(s390_vchbs); 8865 INTRINSIC_WITH_CC(s390_vchhs); 8866 INTRINSIC_WITH_CC(s390_vchfs); 8867 INTRINSIC_WITH_CC(s390_vchgs); 8868 8869 INTRINSIC_WITH_CC(s390_vchlbs); 8870 INTRINSIC_WITH_CC(s390_vchlhs); 8871 INTRINSIC_WITH_CC(s390_vchlfs); 8872 INTRINSIC_WITH_CC(s390_vchlgs); 8873 8874 INTRINSIC_WITH_CC(s390_vfaebs); 8875 INTRINSIC_WITH_CC(s390_vfaehs); 8876 INTRINSIC_WITH_CC(s390_vfaefs); 8877 8878 INTRINSIC_WITH_CC(s390_vfaezbs); 8879 INTRINSIC_WITH_CC(s390_vfaezhs); 8880 INTRINSIC_WITH_CC(s390_vfaezfs); 8881 8882 INTRINSIC_WITH_CC(s390_vfeebs); 8883 INTRINSIC_WITH_CC(s390_vfeehs); 8884 INTRINSIC_WITH_CC(s390_vfeefs); 8885 8886 INTRINSIC_WITH_CC(s390_vfeezbs); 8887 INTRINSIC_WITH_CC(s390_vfeezhs); 8888 INTRINSIC_WITH_CC(s390_vfeezfs); 8889 8890 INTRINSIC_WITH_CC(s390_vfenebs); 8891 INTRINSIC_WITH_CC(s390_vfenehs); 8892 INTRINSIC_WITH_CC(s390_vfenefs); 8893 8894 INTRINSIC_WITH_CC(s390_vfenezbs); 8895 INTRINSIC_WITH_CC(s390_vfenezhs); 8896 INTRINSIC_WITH_CC(s390_vfenezfs); 8897 8898 INTRINSIC_WITH_CC(s390_vistrbs); 8899 INTRINSIC_WITH_CC(s390_vistrhs); 8900 INTRINSIC_WITH_CC(s390_vistrfs); 8901 8902 INTRINSIC_WITH_CC(s390_vstrcbs); 8903 INTRINSIC_WITH_CC(s390_vstrchs); 8904 INTRINSIC_WITH_CC(s390_vstrcfs); 8905 8906 INTRINSIC_WITH_CC(s390_vstrczbs); 8907 INTRINSIC_WITH_CC(s390_vstrczhs); 8908 INTRINSIC_WITH_CC(s390_vstrczfs); 8909 8910 INTRINSIC_WITH_CC(s390_vfcedbs); 8911 INTRINSIC_WITH_CC(s390_vfchdbs); 8912 INTRINSIC_WITH_CC(s390_vfchedbs); 8913 8914 INTRINSIC_WITH_CC(s390_vftcidb); 8915 8916 #undef INTRINSIC_WITH_CC 8917 8918 default: 8919 return nullptr; 8920 } 8921 } 8922 8923 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, 8924 const CallExpr *E) { 8925 auto MakeLdg = [&](unsigned IntrinsicID) { 8926 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8927 clang::CharUnits Align = 8928 getNaturalPointeeTypeAlignment(E->getArg(0)->getType()); 8929 return Builder.CreateCall( 8930 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 8931 Ptr->getType()}), 8932 {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())}); 8933 }; 8934 auto MakeScopedAtomic = [&](unsigned IntrinsicID) { 8935 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8936 return Builder.CreateCall( 8937 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 8938 Ptr->getType()}), 8939 {Ptr, EmitScalarExpr(E->getArg(1))}); 8940 }; 8941 switch (BuiltinID) { 8942 case NVPTX::BI__nvvm_atom_add_gen_i: 8943 case NVPTX::BI__nvvm_atom_add_gen_l: 8944 case NVPTX::BI__nvvm_atom_add_gen_ll: 8945 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E); 8946 8947 case NVPTX::BI__nvvm_atom_sub_gen_i: 8948 case NVPTX::BI__nvvm_atom_sub_gen_l: 8949 case NVPTX::BI__nvvm_atom_sub_gen_ll: 8950 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E); 8951 8952 case NVPTX::BI__nvvm_atom_and_gen_i: 8953 case NVPTX::BI__nvvm_atom_and_gen_l: 8954 case NVPTX::BI__nvvm_atom_and_gen_ll: 8955 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E); 8956 8957 case NVPTX::BI__nvvm_atom_or_gen_i: 8958 case NVPTX::BI__nvvm_atom_or_gen_l: 8959 case NVPTX::BI__nvvm_atom_or_gen_ll: 8960 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E); 8961 8962 case NVPTX::BI__nvvm_atom_xor_gen_i: 8963 case NVPTX::BI__nvvm_atom_xor_gen_l: 8964 case NVPTX::BI__nvvm_atom_xor_gen_ll: 8965 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E); 8966 8967 case NVPTX::BI__nvvm_atom_xchg_gen_i: 8968 case NVPTX::BI__nvvm_atom_xchg_gen_l: 8969 case NVPTX::BI__nvvm_atom_xchg_gen_ll: 8970 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E); 8971 8972 case NVPTX::BI__nvvm_atom_max_gen_i: 8973 case NVPTX::BI__nvvm_atom_max_gen_l: 8974 case NVPTX::BI__nvvm_atom_max_gen_ll: 8975 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E); 8976 8977 case NVPTX::BI__nvvm_atom_max_gen_ui: 8978 case NVPTX::BI__nvvm_atom_max_gen_ul: 8979 case NVPTX::BI__nvvm_atom_max_gen_ull: 8980 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E); 8981 8982 case NVPTX::BI__nvvm_atom_min_gen_i: 8983 case NVPTX::BI__nvvm_atom_min_gen_l: 8984 case NVPTX::BI__nvvm_atom_min_gen_ll: 8985 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E); 8986 8987 case NVPTX::BI__nvvm_atom_min_gen_ui: 8988 case NVPTX::BI__nvvm_atom_min_gen_ul: 8989 case NVPTX::BI__nvvm_atom_min_gen_ull: 8990 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E); 8991 8992 case NVPTX::BI__nvvm_atom_cas_gen_i: 8993 case NVPTX::BI__nvvm_atom_cas_gen_l: 8994 case NVPTX::BI__nvvm_atom_cas_gen_ll: 8995 // __nvvm_atom_cas_gen_* should return the old value rather than the 8996 // success flag. 8997 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false); 8998 8999 case NVPTX::BI__nvvm_atom_add_gen_f: { 9000 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9001 Value *Val = EmitScalarExpr(E->getArg(1)); 9002 // atomicrmw only deals with integer arguments so we need to use 9003 // LLVM's nvvm_atomic_load_add_f32 intrinsic for that. 9004 Value *FnALAF32 = 9005 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType()); 9006 return Builder.CreateCall(FnALAF32, {Ptr, Val}); 9007 } 9008 9009 case NVPTX::BI__nvvm_atom_inc_gen_ui: { 9010 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9011 Value *Val = EmitScalarExpr(E->getArg(1)); 9012 Value *FnALI32 = 9013 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); 9014 return Builder.CreateCall(FnALI32, {Ptr, Val}); 9015 } 9016 9017 case NVPTX::BI__nvvm_atom_dec_gen_ui: { 9018 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9019 Value *Val = EmitScalarExpr(E->getArg(1)); 9020 Value *FnALD32 = 9021 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); 9022 return Builder.CreateCall(FnALD32, {Ptr, Val}); 9023 } 9024 9025 case NVPTX::BI__nvvm_ldg_c: 9026 case NVPTX::BI__nvvm_ldg_c2: 9027 case NVPTX::BI__nvvm_ldg_c4: 9028 case NVPTX::BI__nvvm_ldg_s: 9029 case NVPTX::BI__nvvm_ldg_s2: 9030 case NVPTX::BI__nvvm_ldg_s4: 9031 case NVPTX::BI__nvvm_ldg_i: 9032 case NVPTX::BI__nvvm_ldg_i2: 9033 case NVPTX::BI__nvvm_ldg_i4: 9034 case NVPTX::BI__nvvm_ldg_l: 9035 case NVPTX::BI__nvvm_ldg_ll: 9036 case NVPTX::BI__nvvm_ldg_ll2: 9037 case NVPTX::BI__nvvm_ldg_uc: 9038 case NVPTX::BI__nvvm_ldg_uc2: 9039 case NVPTX::BI__nvvm_ldg_uc4: 9040 case NVPTX::BI__nvvm_ldg_us: 9041 case NVPTX::BI__nvvm_ldg_us2: 9042 case NVPTX::BI__nvvm_ldg_us4: 9043 case NVPTX::BI__nvvm_ldg_ui: 9044 case NVPTX::BI__nvvm_ldg_ui2: 9045 case NVPTX::BI__nvvm_ldg_ui4: 9046 case NVPTX::BI__nvvm_ldg_ul: 9047 case NVPTX::BI__nvvm_ldg_ull: 9048 case NVPTX::BI__nvvm_ldg_ull2: 9049 // PTX Interoperability section 2.2: "For a vector with an even number of 9050 // elements, its alignment is set to number of elements times the alignment 9051 // of its member: n*alignof(t)." 9052 return MakeLdg(Intrinsic::nvvm_ldg_global_i); 9053 case NVPTX::BI__nvvm_ldg_f: 9054 case NVPTX::BI__nvvm_ldg_f2: 9055 case NVPTX::BI__nvvm_ldg_f4: 9056 case NVPTX::BI__nvvm_ldg_d: 9057 case NVPTX::BI__nvvm_ldg_d2: 9058 return MakeLdg(Intrinsic::nvvm_ldg_global_f); 9059 9060 case NVPTX::BI__nvvm_atom_cta_add_gen_i: 9061 case NVPTX::BI__nvvm_atom_cta_add_gen_l: 9062 case NVPTX::BI__nvvm_atom_cta_add_gen_ll: 9063 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta); 9064 case NVPTX::BI__nvvm_atom_sys_add_gen_i: 9065 case NVPTX::BI__nvvm_atom_sys_add_gen_l: 9066 case NVPTX::BI__nvvm_atom_sys_add_gen_ll: 9067 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys); 9068 case NVPTX::BI__nvvm_atom_cta_add_gen_f: 9069 case NVPTX::BI__nvvm_atom_cta_add_gen_d: 9070 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta); 9071 case NVPTX::BI__nvvm_atom_sys_add_gen_f: 9072 case NVPTX::BI__nvvm_atom_sys_add_gen_d: 9073 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys); 9074 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i: 9075 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l: 9076 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll: 9077 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta); 9078 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i: 9079 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l: 9080 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll: 9081 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys); 9082 case NVPTX::BI__nvvm_atom_cta_max_gen_i: 9083 case NVPTX::BI__nvvm_atom_cta_max_gen_ui: 9084 case NVPTX::BI__nvvm_atom_cta_max_gen_l: 9085 case NVPTX::BI__nvvm_atom_cta_max_gen_ul: 9086 case NVPTX::BI__nvvm_atom_cta_max_gen_ll: 9087 case NVPTX::BI__nvvm_atom_cta_max_gen_ull: 9088 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta); 9089 case NVPTX::BI__nvvm_atom_sys_max_gen_i: 9090 case NVPTX::BI__nvvm_atom_sys_max_gen_ui: 9091 case NVPTX::BI__nvvm_atom_sys_max_gen_l: 9092 case NVPTX::BI__nvvm_atom_sys_max_gen_ul: 9093 case NVPTX::BI__nvvm_atom_sys_max_gen_ll: 9094 case NVPTX::BI__nvvm_atom_sys_max_gen_ull: 9095 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys); 9096 case NVPTX::BI__nvvm_atom_cta_min_gen_i: 9097 case NVPTX::BI__nvvm_atom_cta_min_gen_ui: 9098 case NVPTX::BI__nvvm_atom_cta_min_gen_l: 9099 case NVPTX::BI__nvvm_atom_cta_min_gen_ul: 9100 case NVPTX::BI__nvvm_atom_cta_min_gen_ll: 9101 case NVPTX::BI__nvvm_atom_cta_min_gen_ull: 9102 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta); 9103 case NVPTX::BI__nvvm_atom_sys_min_gen_i: 9104 case NVPTX::BI__nvvm_atom_sys_min_gen_ui: 9105 case NVPTX::BI__nvvm_atom_sys_min_gen_l: 9106 case NVPTX::BI__nvvm_atom_sys_min_gen_ul: 9107 case NVPTX::BI__nvvm_atom_sys_min_gen_ll: 9108 case NVPTX::BI__nvvm_atom_sys_min_gen_ull: 9109 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys); 9110 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui: 9111 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta); 9112 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui: 9113 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta); 9114 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui: 9115 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys); 9116 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui: 9117 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys); 9118 case NVPTX::BI__nvvm_atom_cta_and_gen_i: 9119 case NVPTX::BI__nvvm_atom_cta_and_gen_l: 9120 case NVPTX::BI__nvvm_atom_cta_and_gen_ll: 9121 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta); 9122 case NVPTX::BI__nvvm_atom_sys_and_gen_i: 9123 case NVPTX::BI__nvvm_atom_sys_and_gen_l: 9124 case NVPTX::BI__nvvm_atom_sys_and_gen_ll: 9125 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys); 9126 case NVPTX::BI__nvvm_atom_cta_or_gen_i: 9127 case NVPTX::BI__nvvm_atom_cta_or_gen_l: 9128 case NVPTX::BI__nvvm_atom_cta_or_gen_ll: 9129 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta); 9130 case NVPTX::BI__nvvm_atom_sys_or_gen_i: 9131 case NVPTX::BI__nvvm_atom_sys_or_gen_l: 9132 case NVPTX::BI__nvvm_atom_sys_or_gen_ll: 9133 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys); 9134 case NVPTX::BI__nvvm_atom_cta_xor_gen_i: 9135 case NVPTX::BI__nvvm_atom_cta_xor_gen_l: 9136 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll: 9137 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta); 9138 case NVPTX::BI__nvvm_atom_sys_xor_gen_i: 9139 case NVPTX::BI__nvvm_atom_sys_xor_gen_l: 9140 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll: 9141 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys); 9142 case NVPTX::BI__nvvm_atom_cta_cas_gen_i: 9143 case NVPTX::BI__nvvm_atom_cta_cas_gen_l: 9144 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: { 9145 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9146 return Builder.CreateCall( 9147 CGM.getIntrinsic( 9148 Intrinsic::nvvm_atomic_cas_gen_i_cta, 9149 {Ptr->getType()->getPointerElementType(), Ptr->getType()}), 9150 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 9151 } 9152 case NVPTX::BI__nvvm_atom_sys_cas_gen_i: 9153 case NVPTX::BI__nvvm_atom_sys_cas_gen_l: 9154 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: { 9155 Value *Ptr = EmitScalarExpr(E->getArg(0)); 9156 return Builder.CreateCall( 9157 CGM.getIntrinsic( 9158 Intrinsic::nvvm_atomic_cas_gen_i_sys, 9159 {Ptr->getType()->getPointerElementType(), Ptr->getType()}), 9160 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); 9161 } 9162 default: 9163 return nullptr; 9164 } 9165 } 9166 9167 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, 9168 const CallExpr *E) { 9169 switch (BuiltinID) { 9170 case WebAssembly::BI__builtin_wasm_current_memory: { 9171 llvm::Type *ResultType = ConvertType(E->getType()); 9172 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType); 9173 return Builder.CreateCall(Callee); 9174 } 9175 case WebAssembly::BI__builtin_wasm_grow_memory: { 9176 Value *X = EmitScalarExpr(E->getArg(0)); 9177 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType()); 9178 return Builder.CreateCall(Callee, X); 9179 } 9180 9181 default: 9182 return nullptr; 9183 } 9184 } 9185